3 namespace MediaWiki\Tests\Rest\Handler
;
5 use Composer\Semver\Semver
;
11 use MediaWiki\Page\PageIdentity
;
12 use MediaWiki\Parser\ParserCacheFactory
;
13 use MediaWiki\Parser\Parsoid\Config\PageConfigFactory
;
14 use MediaWiki\Parser\Parsoid\HtmlToContentTransform
;
15 use MediaWiki\Parser\Parsoid\HtmlTransformFactory
;
16 use MediaWiki\Parser\RevisionOutputCache
;
17 use MediaWiki\Permissions\UltimateAuthority
;
18 use MediaWiki\Rest\Handler\Helper\HtmlInputTransformHelper
;
19 use MediaWiki\Rest\Handler\Helper\ParsoidFormatHelper
;
20 use MediaWiki\Rest\Handler\ParsoidHandler
;
21 use MediaWiki\Rest\HttpException
;
22 use MediaWiki\Rest\LocalizedHttpException
;
23 use MediaWiki\Rest\RequestData
;
24 use MediaWiki\Rest\RequestInterface
;
25 use MediaWiki\Rest\Response
;
26 use MediaWiki\Rest\ResponseFactory
;
27 use MediaWiki\Revision\MutableRevisionRecord
;
28 use MediaWiki\Revision\RevisionLookup
;
29 use MediaWiki\Revision\RevisionRecord
;
30 use MediaWiki\Revision\SlotRecord
;
31 use MediaWiki\Tests\Rest\RestTestTrait
;
32 use MediaWiki\Tests\Unit\DummyServicesTrait
;
33 use MediaWiki\Title\TitleValue
;
34 use MediaWiki\User\UserIdentityValue
;
35 use MediaWikiIntegrationTestCase
;
36 use NullStatsdDataFactory
;
38 use PHPUnit\Framework\MockObject\MockObject
;
39 use Wikimedia\Message\MessageValue
;
40 use Wikimedia\Parsoid\Config\DataAccess
;
41 use Wikimedia\Parsoid\Config\PageConfig
;
42 use Wikimedia\Parsoid\Config\SiteConfig
;
43 use Wikimedia\Parsoid\Core\ClientError
;
44 use Wikimedia\Parsoid\Core\ResourceLimitExceededException
;
45 use Wikimedia\Parsoid\DOM\Document
;
46 use Wikimedia\Parsoid\Parsoid
;
51 * @covers \MediaWiki\Rest\Handler\ParsoidHandler
52 * @covers \MediaWiki\Parser\Parsoid\HtmlToContentTransform
54 class ParsoidHandlerTest
extends MediaWikiIntegrationTestCase
{
55 use DummyServicesTrait
;
59 * Default request attributes, see ParsoidHandler::getRequestAttributes()
61 private const DEFAULT_ATTRIBS
= [
65 'errorEnc' => 'plain',
68 'offsetType' => 'byte',
72 'domain' => 'wiki.example.com',
74 'offsetType' => 'byte',
76 'reqId' => 'test+test+test',
77 'userAgent' => 'UTAgent',
78 'htmlVariantLanguage' => null,
79 'outputContentVersion' => Parsoid
::AVAILABLE_VERSIONS
[0],
83 /** @var string Imperfect wikitext to be preserved if selser is applied. Corresponds to Selser.html. */
84 private const IMPERFECT_WIKITEXT
= "<div >Turaco</DIV>";
86 /** @var string Normalized version of IMPERFECT_WIKITEXT, expected when no selser is applied. */
87 private const NORMALIZED_WIKITEXT
= "<div>Turaco</div>";
89 public function setUp(): void
{
90 // enable Pig Latin variant conversion
91 $this->overrideConfigValues( [
92 'UsePigLatinVariant' => true,
93 'ParsoidSettings' => [
100 private function createRouter( $authority, $request ) {
101 return $this->newRouter( [
102 'authority' => $authority,
103 'request' => $request,
107 private function newParsoidHandler( $methodOverrides = [], $serviceOverrides = [] ): ParsoidHandler
{
110 $revisionLookup = $this->getServiceContainer()->getRevisionLookup();
111 $dataAccess = $serviceOverrides['ParsoidDataAccess'] ??
$this->getServiceContainer()->getParsoidDataAccess();
112 $siteConfig = $serviceOverrides['ParsoidSiteConfig'] ??
$this->getServiceContainer()->getParsoidSiteConfig();
113 $pageConfigFactory = $serviceOverrides['ParsoidPageConfigFactory']
114 ??
$this->getServiceContainer()->getParsoidPageConfigFactory();
116 $handler = new class (
123 ) extends ParsoidHandler
{
127 public function __construct(
129 RevisionLookup
$revisionLookup,
130 SiteConfig
$siteConfig,
131 PageConfigFactory
$pageConfigFactory,
132 DataAccess
$dataAccess,
142 $this->testCase
= $testCase;
143 $this->overrides
= $overrides;
146 protected function parseHTML( string $html, bool $validateXMLNames = false ): Document
{
147 if ( isset( $this->overrides
['parseHTML'] ) ) {
148 return $this->overrides
['parseHTML']( $html, $validateXMLNames );
151 return parent
::parseHTML(
157 protected function newParsoid(): Parsoid
{
158 if ( isset( $this->overrides
['newParsoid'] ) ) {
159 return $this->overrides
['newParsoid']();
162 return parent
::newParsoid();
165 public function getRequest(): RequestInterface
{
166 if ( isset( $this->overrides
['getRequest'] ) ) {
167 return $this->overrides
['getRequest']();
170 return parent
::getRequest();
173 protected function getHtmlInputTransformHelper(
177 ): HtmlInputTransformHelper
{
178 if ( isset( $this->overrides
['getHtmlInputHelper'] ) ) {
179 return $this->overrides
['getHtmlInputHelper']();
182 return parent
::getHtmlInputTransformHelper(
189 public function execute(): Response
{
190 ParsoidHandlerTest
::fail( 'execute was not expected to be called' );
193 public function &getRequestAttributes(): array {
194 if ( isset( $this->overrides
['getRequestAttributes'] ) ) {
195 return $this->overrides
['getRequestAttributes']();
198 return parent
::getRequestAttributes();
201 public function acceptable( array &$attribs ): bool {
202 if ( isset( $this->overrides
['acceptable'] ) ) {
203 return $this->overrides
['acceptable']( $attribs );
206 return parent
::acceptable( $attribs );
209 public function tryToCreatePageConfig(
210 array $attribs, ?
string $wikitext = null, bool $html2WtMode = false
212 if ( isset( $this->overrides
['tryToCreatePageConfig'] ) ) {
213 return $this->overrides
['tryToCreatePageConfig'](
214 $attribs, $wikitext, $html2WtMode
218 'pagelanguage' => $this->testCase
->createLanguageMock( 'en' ),
221 return parent
::tryToCreatePageConfig(
222 $attribs, $wikitext, $html2WtMode
226 public function wt2html(
227 PageConfig
$pageConfigConfig,
229 ?
string $wikitext = null
231 return parent
::wt2html(
238 public function html2wt( $page, array $attribs, string $html ) {
239 return parent
::html2wt(
246 public function pb2pb( array $attribs ) {
247 return parent
::pb2pb( $attribs );
250 public function updateRedLinks(
251 PageConfig
$pageConfig,
255 return parent
::updateRedLinks(
262 public function languageConversion(
263 PageConfig
$pageConfig,
267 return parent
::languageConversion(
275 $authority = new UltimateAuthority( new UserIdentityValue( 0, '127.0.0.1' ) );
276 $request = new RequestData( [ 'method' => $method ] );
277 $router = $this->createRouter( $authority, $request );
280 $formatter = $this->getDummyTextFormatter( true );
282 /** @var ResponseFactory|MockObject $responseFactory */
283 $responseFactory = new ResponseFactory( [ 'qqx' => $formatter ] );
291 $this->createHookContainer(),
292 $this->getSession( true )
299 * @param PageIdentity $page
300 * @param int|string|RevisionRecord|null $revIdOrText
304 private function getPageConfig( PageIdentity
$page, $revIdOrText = null ): PageConfig
{
306 if ( is_string( $revIdOrText ) ) {
307 $rev = new MutableRevisionRecord( $page );
308 $rev->setContent( SlotRecord
::MAIN
, new WikitextContent( $revIdOrText ) );
310 // may be null or an int or a RevisionRecord
314 return $this->getServiceContainer()->getParsoidPageConfigFactory()->create( $page, null, $rev );
317 private function getPageConfigFactory( PageIdentity
$page ): PageConfigFactory
{
318 /** @var PageConfigFactory|MockObject $pageConfigFactory */
319 $pageConfigFactory = $this->createNoOpMock( PageConfigFactory
::class, [ 'create' ] );
320 $pageConfigFactory->method( 'create' )->willReturn( $this->getPageConfig( $page ) );
321 return $pageConfigFactory;
324 private function getTextFromFile( string $name ): string {
325 return trim( file_get_contents( __DIR__
. "/data/Transform/$name" ) );
328 private function getJsonFromFile( string $name ): array {
329 $text = $this->getTextFromFile( $name );
330 return json_decode( $text, JSON_OBJECT_AS_ARRAY
);
333 // Mostly lifted from the contentTypeMatcher in tests/api-testing/REST/Transform.js
334 private function contentTypeMatcher( string $expected, string $actual ): bool {
335 if ( $expected === 'application/json' ) {
336 return $actual === $expected;
339 $pattern = '/^([-\w]+\/[-\w]+); charset=utf-8; profile="https:\/\/www.mediawiki.org\/wiki\/Specs\/([-\w]+)\/(\d+\.\d+\.\d+)"$/';
341 preg_match( $pattern, $expected, $expectedParts );
342 if ( !$expectedParts ) {
345 [ , $expectedMime, $expectedSpec, $expectedVersion ] = $expectedParts;
347 preg_match( $pattern, $actual, $actualParts );
348 if ( !$actualParts ) {
351 [ , $actualMime, $actualSpec, $actualVersion ] = $actualParts;
353 // Match version using caret semantics
354 if ( !Semver
::satisfies( $actualVersion, "^{$expectedVersion}" ) ) {
358 if ( $actualMime !== $expectedMime ||
$actualSpec !== $expectedSpec ) {
365 public function provideHtml2wt() {
366 $profileVersion = '2.6.0';
367 $wikitextProfileUri = 'https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0';
368 $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion;
369 $dataParsoidProfileUri = 'https://www.mediawiki.org/wiki/Specs/data-parsoid/' . $profileVersion;
371 $wikiTextContentType = "text/plain; charset=utf-8; profile=\"$wikitextProfileUri\"";
372 $htmlContentType = "text/html;profile=\"$htmlProfileUri\"";
373 $dataParsoidContentType = "application/json;profile=\"$dataParsoidProfileUri\"";
376 'content-type' => $htmlContentType,
379 // NOTE: profile version 999 is a placeholder for a future feature, see T78676
380 $htmlContentType999 = 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"';
382 'content-type' => $htmlContentType999,
385 // should convert html to wikitext ///////////////////////////////////
386 $html = $this->getTextFromFile( 'MainPage-data-parsoid.html' );
388 'MediaWiki has been successfully installed',
389 '== Getting started ==',
393 yield
'should convert html to wikitext' => [
399 // should load original wikitext by revision id ////////////////////
401 'oldid' => 1, // will be replaced by the actual revid
403 yield
'should load original wikitext by revision id' => [
409 // should accept original wikitext in body ////////////////////
410 $originalWikitext = $this->getTextFromFile( 'OriginalMainPage.wikitext' );
416 'content-type' => $wikiTextContentType,
418 'body' => $originalWikitext,
423 yield
'should accept original wikitext in body' => [
426 $expectedText, // TODO: ensure it's actually used!
429 // should use original html for selser (default) //////////////////////
430 $originalDataParsoid = $this->getJsonFromFile( 'MainPage-original.data-parsoid' );
433 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
436 'headers' => $htmlHeaders,
437 'body' => $this->getTextFromFile( 'MainPage-original.html' ),
441 'content-type' => $dataParsoidContentType,
443 'body' => $originalDataParsoid
448 yield
'should use original html for selser (default)' => [
454 // should use original html for selser (1.1.1, meta) ///////////////////
457 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
461 // XXX: If this is required anyway, how do we know we are using the
462 // version given in the HTML?
463 'content-type' => 'text/html; profile="mediawiki.org/specs/html/1.1.1"',
465 'body' => $this->getTextFromFile( 'MainPage-data-parsoid-1.1.1.html' ),
469 'content-type' => $dataParsoidContentType,
471 'body' => $originalDataParsoid
476 yield
'should use original html for selser (1.1.1, meta)' => [
482 // should accept original html for selser (1.1.1, headers) ////////////
485 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
489 // Set the schema version to 1.1.1!
490 'content-type' => 'text/html; profile="mediawiki.org/specs/html/1.1.1"',
492 // No schema version in HTML
493 'body' => $this->getTextFromFile( 'MainPage-original.html' ),
497 'content-type' => $dataParsoidContentType,
499 'body' => $originalDataParsoid
504 yield
'should use original html for selser (1.1.1, headers)' => [
510 // Return original wikitext when HTML doesn't change ////////////////////////////
511 // New and old html are identical, which should produce no diffs
512 // and reuse the original wikitext.
513 $html = $this->getTextFromFile( 'Selser.html' );
515 // Original wikitext (to be preserved by selser)
516 $originalWikitext = self
::IMPERFECT_WIKITEXT
;
518 // Normalized wikitext (when no selser is applied)
519 $normalizedWikitext = self
::NORMALIZED_WIKITEXT
;
521 $dataParsoid = [ // Per Selser.html
523 'mwAA' => [ 'dsr' => [ 0, 19, 0, 0 ] ],
524 'mwAg' => [ 'stx' => 'html', 'dsr' => [ 0, 19, 7, 6 ] ],
530 'oldid' => 1, // Will be replaced by the revision ID of the default test page
532 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
535 'headers' => $htmlHeaders,
536 // original HTML is the same as the new HTML
540 'body' => $dataParsoid,
546 yield
'selser should return original wikitext if the HTML didn\'t change (original HTML given)' => [
549 [ $originalWikitext ], // Returns original wikitext, because HTML didn't change.
552 unset( $attribs['opts']['original'] );
553 yield
'selser should return original wikitext if the HTML didn\'t change (original HTML from ParserCache)' => [
556 [ $originalWikitext ], // Returns original wikitext, because HTML didn't change.
559 // Should fall back to non-selective serialization. //////////////////
560 // Without the original wikitext, use non-selective serialization.
562 // No wikitext, no revid/oldid
564 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
567 'headers' => $htmlHeaders,
568 // original HTML is the same as the new HTML
572 'body' => $dataParsoid,
577 yield
'Should fall back to non-selective serialization' => [
580 [ $normalizedWikitext ],
583 // should apply data-parsoid to duplicated ids /////////////////////////
587 'mwBB' => [ 'autoInsertedEnd' => true, 'stx' => 'html' ]
590 $html = '<html><body id="mwAA"><div id="mwBB">data-parsoid test</div>' .
591 '<div id="mwBB">data-parsoid test</div></body></html>';
592 $originalHtml = '<html><body id="mwAA"><div id="mwBB">data-parsoid test</div></body></html>';
596 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
599 'headers' => $htmlHeaders,
600 'body' => $originalHtml
603 'body' => $dataParsoid,
608 yield
'should apply data-parsoid to duplicated ids' => [
611 [ '<div>data-parsoid test<div>data-parsoid test' ],
614 // should ignore data-parsoid if the input format is not pagebundle ////////////////////////
615 $html = '<html><body id="mwAA"><div id="mwBB">data-parsoid test</div>' .
616 '<div id="mwBB">data-parsoid test</div></body></html>';
617 $originalHtml = '<html><body id="mwAA"><div id="mwBB">data-parsoid test</div></body></html>';
621 'from' => ParsoidFormatHelper
::FORMAT_HTML
,
624 'headers' => $htmlHeaders,
625 'body' => $originalHtml
628 // This has 'autoInsertedEnd' => true, which would cause
629 // closing </div> tags to be omitted.
630 'body' => $dataParsoid,
635 yield
'should ignore data-parsoid if the input format is not pagebundle' => [
638 [ '<div>data-parsoid test</div><div>data-parsoid test</div>' ],
641 // should apply original data-mw ///////////////////////////////////////
642 $html = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">hi</p>';
643 $originalHtml = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">ho</p>';
644 $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ];
650 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ],
651 'params' => [ '1' => [ 'wt' => 'hi' ] ],
660 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
663 'headers' => $htmlHeaders,
664 'body' => $originalHtml
667 'body' => $dataParsoid,
670 'body' => $dataMediaWiki,
675 yield
'should apply original data-mw' => [
681 // should give precedence to inline data-mw over original ////////
682 $html = '<p about="#mwt1" typeof="mw:Transclusion" data-mw=\'{"parts":[{"template":{"target":{"wt":"1x","href":"./Template:1x"},"params":{"1":{"wt":"hi"}},"i":0}}]}\' id="mwAQ">hi</p>';
683 $originalHtml = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">ho</p>';
684 $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ];
685 $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts!
688 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
691 'headers' => $htmlHeaders,
692 'body' => $originalHtml
695 'body' => $dataParsoid,
698 'body' => $dataMediaWiki,
703 yield
'should give precedence to inline data-mw over original' => [
709 // should not apply original data-mw if modified is supplied ///////////
710 $html = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">hi</p>';
711 $originalHtml = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">ho</p>';
712 $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ];
713 $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts!
714 $dataMediaWikiModified = [
719 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ],
720 'params' => [ '1' => [ 'wt' => 'hi' ] ],
729 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
730 'data-mw' => [ // modified data
731 'body' => $dataMediaWikiModified,
735 'headers' => $htmlHeaders999,
736 'body' => $originalHtml
739 'body' => $dataParsoid,
741 'data-mw' => [ // original data
742 'body' => $dataMediaWiki,
747 yield
'should not apply original data-mw if modified is supplied' => [
753 // should apply original data-mw when modified is absent (captions 1) ///////////
754 $html = $this->getTextFromFile( 'Image.html' );
755 $dataParsoid = [ 'ids' => [
756 'mwAg' => [ 'optList' => [ [ 'ck' => 'caption', 'ak' => 'Testing 123' ] ] ],
757 'mwAw' => [ 'a' => [ 'href' => './File:Foobar.jpg' ], 'sa' => [] ],
759 'a' => [ 'resource' => './File:Foobar.jpg', 'height' => '28', 'width' => '240' ],
760 'sa' => [ 'resource' => 'File:Foobar.jpg' ]
763 $dataMediaWiki = [ 'ids' => [ 'mwAg' => [ 'caption' => 'Testing 123' ] ] ];
767 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
770 'body' => $dataParsoid,
772 'data-mw' => [ // original data
773 'body' => $dataMediaWiki,
776 'headers' => $htmlHeaders999,
782 yield
'should apply original data-mw when modified is absent (captions 1)' => [
784 $html, // modified HTML
785 [ '[[File:Foobar.jpg|Testing 123]]' ],
788 // should give precedence to inline data-mw over modified (captions 2) /////////////
789 $htmlModified = $this->getTextFromFile( 'Image-data-mw.html' );
790 $dataMediaWikiModified = [
792 'mwAg' => [ 'caption' => 'Testing 123' ]
798 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
800 'body' => $dataMediaWikiModified,
804 'body' => $dataParsoid,
806 'data-mw' => [ // original data
807 'body' => $dataMediaWiki,
810 'headers' => $htmlHeaders999,
816 yield
'should give precedence to inline data-mw over modified (captions 2)' => [
818 $htmlModified, // modified HTML
819 [ '[[File:Foobar.jpg]]' ],
822 // should give precedence to modified data-mw over original (captions 3) /////////////
823 $dataMediaWikiModified = [
831 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
833 'body' => $dataMediaWikiModified,
837 'body' => $dataParsoid,
839 'data-mw' => [ // original data
840 'body' => $dataMediaWiki,
843 'headers' => $htmlHeaders999,
849 yield
'should give precedence to modified data-mw over original (captions 3)' => [
851 $html, // modified HTML
852 [ '[[File:Foobar.jpg]]' ],
855 // should apply extra normalizations ///////////////////
856 $htmlModified = 'Foo<h2></h2>Bar';
862 yield
'should apply extra normalizations' => [
864 $htmlModified, // modified HTML
865 [ 'FooBar' ], // empty tag was stripped
868 // should apply version downgrade ///////////
869 $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0
872 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
876 // Specify newer profile version for original HTML
877 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"'
879 // The profile version given inline in the original HTML doesn't matter, it's ignored
880 'body' => $htmlOfMinimal,
882 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ],
883 'data-mw' => [ 'body' => [ 'ids' => [] ] ], // required by version 999.0.0
887 yield
'should apply version downgrade' => [
893 // should not apply version downgrade if versions are the same ///////////
894 $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0
897 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
901 // Specify the exact same version specified inline in Minimal.html 2.4.0
902 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"'
904 // The profile version given inline in the original HTML doesn't matter, it's ignored
905 'body' => $htmlOfMinimal,
907 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ],
911 yield
'should not apply version downgrade if versions are the same' => [
917 // should convert html to json ///////////////////////////////////
918 $html = $this->getTextFromFile( 'JsonConfig.html' );
925 // even if the path says "wikitext", the contentmodel from the body should win.
926 'format' => ParsoidFormatHelper
::FORMAT_WIKITEXT
,
927 'contentmodel' => CONTENT_MODEL_JSON
,
930 yield
'should convert html to json' => [
934 [ 'content-type' => 'application/json' ],
937 // page bundle input should work with no original data present ///////////
938 $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0
941 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
945 yield
'page bundle input should work with no original data present' => [
952 private function makePage( $title, $wikitext ): RevisionRecord
{
953 $title = new TitleValue( NS_MAIN
, $title );
954 $rev = $this->getServiceContainer()->getRevisionLookup()->getRevisionByTitle( $title );
960 /** @var RevisionRecord $rev */
961 [ 'revision-record' => $rev ] = $this->editPage( 'Test_html2wt', $wikitext )->getValue();
967 * @dataProvider provideHtml2wt
969 * @param array $attribs
970 * @param string $html
971 * @param string[] $expectedText
972 * @param string[] $expectedHeaders
974 * @covers MediaWiki\Parser\Parsoid\HtmlToContentTransform
975 * @covers MediaWiki\Rest\Handler\ParsoidHandler::html2wt
977 public function testHtml2wt(
981 array $expectedHeaders = []
983 $wikitextProfileUri = 'https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0';
984 $expectedHeaders +
= [
985 'content-type' => "text/plain; charset=utf-8; profile=\"$wikitextProfileUri\"",
988 $wikitext = self
::IMPERFECT_WIKITEXT
;
990 $rev = $this->makePage( 'Test_html2wt', $wikitext );
991 $page = $rev->getPage();
993 $pageConfig = $this->getPageConfig( $page );
995 $attribs +
= self
::DEFAULT_ATTRIBS
;
996 $attribs['opts'] +
= self
::DEFAULT_ATTRIBS
['opts'];
997 $attribs['opts']['from'] ??
= 'html';
998 $attribs['envOptions'] +
= self
::DEFAULT_ATTRIBS
['envOptions'];
1000 if ( $attribs['oldid'] ) {
1001 // Set the actual ID of an existing revision
1002 $attribs['oldid'] = $rev->getId();
1005 $handler = $this->newParsoidHandler();
1007 $response = $handler->html2wt( $pageConfig, $attribs, $html );
1008 $body = $response->getBody();
1010 $wikitext = $body->getContents();
1012 foreach ( $expectedHeaders as $name => $value ) {
1013 $this->assertSame( $value, $response->getHeaderLine( $name ) );
1016 foreach ( (array)$expectedText as $exp ) {
1017 $this->assertStringContainsString( $exp, $wikitext );
1021 public function provideHtml2wtThrows() {
1022 $html = '<html lang="en"><body>123</body></html>';
1024 $profileVersion = '2.4.0';
1025 $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion;
1026 $htmlContentType = "text/html;profile=\"$htmlProfileUri\"";
1028 'content-type' => $htmlContentType,
1031 // XXX: what does version 999.0.0 mean?!
1032 $htmlContentType999 = 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"';
1034 'content-type' => $htmlContentType999,
1037 // Content-type of original html is missing ////////////////////////////
1042 // no headers with content type
1048 yield
'Content-type of original html is missing' => [
1051 new LocalizedHttpException(
1052 new MessageValue( 'rest-html-backend-error' ),
1054 [ 'reason' => 'Content-type of original html is missing.' ]
1058 // should fail to downgrade the original version for an unknown transition ///////////
1059 $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' );
1060 $htmlOfMinimal2222 = $this->getTextFromFile( 'Minimal-2222.html' );
1063 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1067 // Specify version 2222.0.0!
1068 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2222.0.0"'
1070 'body' => $htmlOfMinimal2222,
1072 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ],
1076 yield
'should fail to downgrade the original version for an unknown transition' => [
1079 new LocalizedHttpException(
1080 new MessageValue( 'rest-html-backend-error' ),
1082 [ 'reason' => 'No downgrade possible from schema version 2222.0.0 to 2.4.0.' ]
1086 // DSR offsetType mismatch: UCS2 vs byte ///////////////////////////////
1088 'offsetType' => 'byte',
1090 'offsetType' => 'byte',
1093 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1096 'headers' => $htmlHeaders,
1101 'offsetType' => 'UCS2',
1108 yield
'DSR offsetType mismatch: UCS2 vs byte' => [
1111 new LocalizedHttpException(
1112 new MessageValue( 'rest-html-backend-error' ),
1114 [ 'reason' => 'DSR offsetType mismatch: UCS2 vs byte' ]
1118 // DSR offsetType mismatch: byte vs UCS2 ///////////////////////////////
1120 'offsetType' => 'UCS2',
1122 'offsetType' => 'UCS2',
1126 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1129 'headers' => $htmlHeaders,
1134 'offsetType' => 'byte',
1141 yield
'DSR offsetType mismatch: byte vs UCS2' => [
1144 new LocalizedHttpException(
1145 new MessageValue( 'rest-html-backend-error' ),
1147 [ 'reason' => 'DSR offsetType mismatch: byte vs UCS2' ]
1151 // Could not find previous revision ////////////////////////////
1153 'oldid' => 1155779922,
1155 // set original HTML to enable selser
1158 'headers' => $htmlHeaders,
1164 yield
'Could not find previous revision' => [
1168 'The specified revision is deleted or suppressed.',
1173 // should return a 400 for missing inline data-mw (2.x) ///////////////////
1174 $html = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">hi</p>';
1175 $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ];
1176 $htmlOrig = '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">ho</p>';
1179 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1182 'body' => $dataParsoid,
1185 'headers' => $htmlHeaders,
1186 // slightly modified
1187 'body' => $htmlOrig,
1192 yield
'should return a 400 for missing inline data-mw (2.x)' => [
1196 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src',
1201 // should return a 400 for not supplying data-mw //////////////////////
1204 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1207 'body' => $dataParsoid,
1210 'headers' => $htmlHeaders999,
1211 'body' => $htmlOrig,
1216 yield
'should return a 400 for not supplying data-mw' => [
1219 new LocalizedHttpException(
1220 new MessageValue( 'rest-html-backend-error' ),
1222 [ 'reason' => 'Invalid data-mw was provided.' ]
1226 // should return a 400 for missing modified data-mw
1229 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1232 'body' => $dataParsoid,
1236 // Missing data-mw.parts!
1237 'ids' => [ 'mwAQ' => [] ],
1241 'headers' => $htmlHeaders999,
1242 'body' => $htmlOrig,
1247 yield
'should return a 400 for missing modified data-mw' => [
1251 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src',
1256 // should return http 400 if supplied data-parsoid is empty ////////////
1257 $html = '<html><head></head><body><p>hi</p></body></html>';
1258 $htmlOrig = '<html><head></head><body><p>ho</p></body></html>';
1261 'from' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
,
1267 'headers' => $htmlHeaders,
1268 'body' => $htmlOrig,
1273 yield
'should return http 400 if supplied data-parsoid is empty' => [
1276 new LocalizedHttpException(
1277 new MessageValue( 'rest-html-backend-error' ),
1279 [ 'reason' => 'Invalid data-parsoid was provided.' ]
1283 // TODO: ResourceLimitExceededException from $parsoid->dom2wikitext -> 413
1284 // TODO: ClientError from $parsoid->dom2wikitext -> 413
1285 // TODO: Errors from PageBundle->validate
1289 * @dataProvider provideHtml2wtThrows
1291 * @param array $attribs
1292 * @param string $html
1293 * @param Exception $expectedException
1295 public function testHtml2wtThrows(
1298 Exception
$expectedException
1300 if ( isset( $attribs['oldid'] ) ) {
1301 // If a specific revision ID is requested, it's almost certain to no exist.
1302 // So we are testing with a non-existing page.
1303 $page = $this->getNonexistingTestPage();
1305 $page = $this->getExistingTestPage();
1308 $pageConfig = $this->getPageConfig( $page );
1310 $attribs +
= self
::DEFAULT_ATTRIBS
;
1311 $attribs['opts'] +
= self
::DEFAULT_ATTRIBS
['opts'];
1312 $attribs['opts']['from'] ??
= 'html';
1313 $attribs['envOptions'] +
= self
::DEFAULT_ATTRIBS
['envOptions'];
1315 $handler = $this->newParsoidHandler();
1318 $handler->html2wt( $pageConfig, $attribs, $html );
1319 $this->fail( 'Expected exception: ' . $expectedException );
1320 } catch ( Exception
$e ) {
1321 $this->assertInstanceOf( get_class( $expectedException ), $e );
1322 $this->assertSame( $expectedException->getCode(), $e->getCode() );
1324 if ( $expectedException instanceof HttpException
) {
1325 /** @var HttpException $e */
1326 $this->assertSame( $expectedException->getErrorData(), $e->getErrorData() );
1329 $this->assertSame( $expectedException->getMessage(), $e->getMessage() );
1333 public static function provideDom2wikitextException() {
1334 yield
'ClientError' => [
1335 new ClientError( 'test' ),
1336 new HttpException( 'test', 400 )
1339 yield
'ResourceLimitExceededException' => [
1340 new ResourceLimitExceededException( 'test' ),
1341 new HttpException( 'test', 413 )
1346 * @dataProvider provideDom2wikitextException
1348 * @param Exception $throw
1349 * @param Exception $expectedException
1351 public function testHtml2wtHandlesDom2wikitextException(
1353 Exception
$expectedException
1355 $html = '<p>hi</p>';
1356 $page = $this->getExistingTestPage();
1359 'from' => ParsoidFormatHelper
::FORMAT_HTML
1361 ] + self
::DEFAULT_ATTRIBS
;
1363 // Make a fake Parsoid that throws
1364 /** @var Parsoid|MockObject $parsoid */
1365 $parsoid = $this->createNoOpMock( Parsoid
::class, [ 'dom2wikitext' ] );
1366 $parsoid->method( 'dom2wikitext' )->willThrowException( $throw );
1368 // Make a fake HtmlTransformFactory that returns an HtmlToContentTransform that uses the fake Parsoid.
1369 /** @var HtmlTransformFactory|MockObject $factory */
1370 $factory = $this->createNoOpMock( HtmlTransformFactory
::class, [ 'getHtmlToContentTransform' ] );
1371 $factory->method( 'getHtmlToContentTransform' )->willReturn( new HtmlToContentTransform(
1376 $this->getPageConfigFactory( $page ),
1377 $this->getServiceContainer()->getContentHandlerFactory()
1380 // Use an HtmlInputTransformHelper that uses the fake HtmlTransformFactory, so it ends up
1381 // using the HtmlToContentTransform that has the fake Parsoid which throws an exception.
1382 $handler = $this->newParsoidHandler( [
1383 'getHtmlInputHelper' => function () use ( $factory, $page, $html ) {
1384 $helper = new HtmlInputTransformHelper(
1385 new NullStatsdDataFactory(),
1387 $this->getServiceContainer()->getParsoidOutputStash(),
1388 $this->getServiceContainer()->getParsoidOutputAccess()
1391 $helper->init( $page, [ 'html' => $html ], [] );
1396 // Check that the exception thrown by Parsoid gets converted as expected.
1397 $this->expectException( get_class( $expectedException ) );
1398 $this->expectExceptionCode( $expectedException->getCode() );
1399 $this->expectExceptionMessage( $expectedException->getMessage() );
1401 $handler->html2wt( $page, $attribs, $html );
1404 /** @return Generator */
1405 public function provideTryToCreatePageConfigData() {
1406 $en = $this->createLanguageMock( 'en' );
1407 $ar = $this->createLanguageMock( 'ar' );
1408 $de = $this->createLanguageMock( 'de' );
1409 yield
'Default attribs for tryToCreatePageConfig()' => [
1410 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $en ],
1412 'html2WtMode' => false,
1413 'expectedPageLanguage' => $en,
1416 yield
'tryToCreatePageConfig with wikitext' => [
1417 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $en ],
1418 'wikitext' => "=test=",
1419 'html2WtMode' => false,
1420 'expected page language' => $en,
1423 yield
'tryToCreatePageConfig with html2WtMode set to true' => [
1424 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => null ],
1426 'html2WtMode' => true,
1427 'expected page language' => $en,
1430 yield
'tryToCreatePageConfig with both wikitext and html2WtMode' => [
1431 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $ar ],
1432 'wikitext' => "=header=",
1433 'html2WtMode' => true,
1434 'expected page language' => $ar,
1437 yield
'Try to create a page config with pageName set to empty string' => [
1438 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => $de ],
1440 'html2WtMode' => false,
1441 'expected page language' => $de,
1444 yield
'Try to create a page config with no page language' => [
1445 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => null ],
1448 'expected page language' => $en,
1453 * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig
1455 * @dataProvider provideTryToCreatePageConfigData
1457 public function testTryToCreatePageConfig(
1461 Language
$expectedLanguage
1463 // Create a page, if needed, to test with oldid
1464 $origContent = 'Test content for ' . __METHOD__
;
1465 $page = $this->getNonexistingTestPage();
1466 $this->editPage( $page, $origContent );
1467 $expectedWikitext = $wikitext ??
$origContent;
1468 $pageConfig = $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode );
1472 $pageConfig->getRevisionContent()->getContent( SlotRecord
::MAIN
)
1475 $this->assertSame( $expectedLanguage->getCode(), $pageConfig->getPageLanguageBcp47()->getCode() );
1478 /** @return Generator */
1479 public function provideTryToCreatePageConfigDataThrows() {
1480 $en = $this->createLanguageMock( 'en' );
1481 yield
"PageConfig with oldid that doesn't exist" => [
1482 'attribs' => [ 'oldid' => null, 'pageName' => 'Test', 'pagelanguage' => $en ],
1484 'html2WtMode' => false,
1487 yield
'PageConfig with a bad title' => [
1488 [ 'oldid' => null, 'pageName' => 'Special:Badtitle', 'pagelanguage' => $en ],
1490 'html2WtMode' => false,
1493 yield
"PageConfig with a revision that doesn't exist" => [
1494 // 'oldid' is so large because we want to emulate a revision
1495 // that doesn't exist.
1496 [ 'oldid' => 12345678, 'pageName' => 'Test', 'pagelanguage' => $en ],
1498 'html2WtMode' => false,
1503 * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig
1505 * @dataProvider provideTryToCreatePageConfigDataThrows
1507 public function testTryToCreatePageConfigThrows( array $attribs, $wikitext, $html2WtMode ) {
1508 $this->expectException( HttpException
::class );
1509 $this->expectExceptionCode( 404 );
1511 $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode );
1514 public static function provideRoundTripNoSelser() {
1515 yield
'space in heading' => [
1516 "==foo==\nsomething\n"
1520 public static function provideRoundTripNeedingSelser() {
1521 yield
'uppercase tags' => [
1527 * @dataProvider provideRoundTripNoSelser
1529 public function testRoundTripWithHTML( $wikitext ) {
1530 $handler = $this->newParsoidHandler();
1532 $attribs = self
::DEFAULT_ATTRIBS
;
1533 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1534 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_HTML
;
1536 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext );
1537 $response = $handler->wt2html( $pageConfig, $attribs, $wikitext );
1538 $body = $response->getBody();
1540 $html = $body->getContents();
1542 // Got HTML, now convert back
1543 $attribs = self
::DEFAULT_ATTRIBS
;
1544 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_HTML
;
1545 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1547 $pageConfig = $handler->tryToCreatePageConfig( $attribs, null, true );
1548 $response = $handler->html2wt( $pageConfig, $attribs, $html );
1549 $body = $response->getBody();
1551 $actual = $body->getContents();
1553 // apply some normalization before comparing
1554 $actual = trim( $actual );
1555 $wikitext = trim( $wikitext );
1557 $this->assertSame( $wikitext, $actual );
1561 * @dataProvider provideRoundTripNoSelser
1563 public function testRoundTripWithPageBundleWithoutOriginalHTML( $wikitext ) {
1564 $handler = $this->newParsoidHandler();
1566 $attribs = self
::DEFAULT_ATTRIBS
;
1567 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1568 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
;
1570 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext );
1571 $response = $handler->wt2html( $pageConfig, $attribs, $wikitext );
1572 $body = $response->getBody();
1574 $pbJson = $body->getContents();
1576 $pbData = json_decode( $pbJson, JSON_OBJECT_AS_ARRAY
);
1577 $html = $pbData['html']['body']; // HTML with data-parsoid stripped out
1579 // Got HTML, now convert back
1580 $attribs = self
::DEFAULT_ATTRIBS
;
1581 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
;
1582 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1583 $attribs['opts']['original'] = [
1584 'data-parsoid' => $pbData['data-parsoid'],
1587 $pageConfig = $handler->tryToCreatePageConfig( $attribs, null, true );
1588 $response = $handler->html2wt( $pageConfig, $attribs, $html );
1589 $body = $response->getBody();
1591 $actual = $body->getContents();
1593 // apply some normalization before comparing
1594 $actual = trim( $actual );
1595 $wikitext = trim( $wikitext );
1597 $this->assertSame( $wikitext, $actual );
1601 * @dataProvider provideRoundTripNoSelser
1602 * @dataProvider provideRoundTripNeedingSelser
1604 public function testRoundTripWithSelser( $wikitext ) {
1605 $handler = $this->newParsoidHandler();
1607 $attribs = self
::DEFAULT_ATTRIBS
;
1608 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1609 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
;
1611 $page = $this->getExistingTestPage();
1612 $revid = $page->getLatest();
1614 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext );
1615 $response = $handler->wt2html( $pageConfig, $attribs, $wikitext );
1617 // NOTE: Make sure there is no ETag if no stashing was requested (T331629)
1618 $etag = $response->getHeaderLine( 'etag' );
1619 $this->assertSame( '', $etag, 'ETag' );
1621 $body = $response->getBody();
1623 $pbJson = $body->getContents();
1625 $pbData = json_decode( $pbJson, JSON_OBJECT_AS_ARRAY
);
1626 $html = $pbData['html']['body']; // HTML with data-parsoid stripped out
1628 // Got HTML, now convert back
1629 $attribs = self
::DEFAULT_ATTRIBS
;
1630 $attribs['oldid'] = $revid;
1631 $attribs['opts']['revid'] = $revid;
1632 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
;
1633 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1634 $attribs['opts']['original'] = $pbData;
1635 $attribs['opts']['original']['wikitext']['body'] = $wikitext;
1637 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext, true );
1638 $response = $handler->html2wt( $pageConfig, $attribs, $html );
1639 $body = $response->getBody();
1641 $actual = $body->getContents();
1643 // apply some normalization before comparing
1644 $actual = trim( $actual );
1645 $wikitext = trim( $wikitext );
1647 $this->assertSame( $wikitext, $actual );
1651 * @dataProvider provideRoundTripNoSelser
1652 * @dataProvider provideRoundTripNeedingSelser
1654 public function testRoundTripWithStashing( $wikitext ) {
1655 $handler = $this->newParsoidHandler();
1657 $attribs = self
::DEFAULT_ATTRIBS
;
1658 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1659 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_HTML
;
1660 $attribs['opts']['stash'] = true;
1662 $page = $this->getExistingTestPage();
1663 $revid = $page->getLatest();
1665 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext );
1666 $response = $handler->wt2html( $pageConfig, $attribs, $wikitext );
1668 $etag = $response->getHeaderLine( 'etag' );
1669 $this->assertNotEmpty( $etag, 'ETag' );
1671 $body = $response->getBody();
1673 $html = $body->getContents();
1675 // Got HTML, now convert back
1676 $attribs = self
::DEFAULT_ATTRIBS
;
1677 $attribs['oldid'] = $revid;
1678 $attribs['opts']['revid'] = $revid;
1679 $attribs['opts']['from'] = ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
;
1680 $attribs['opts']['format'] = ParsoidFormatHelper
::FORMAT_WIKITEXT
;
1681 $attribs['opts']['original']['etag'] = $etag;
1682 $attribs['opts']['original']['wikitext'] = $wikitext;
1684 $pageConfig = $handler->tryToCreatePageConfig( $attribs, $wikitext, true );
1685 $response = $handler->html2wt( $pageConfig, $attribs, $html );
1686 $body = $response->getBody();
1688 $actual = $body->getContents();
1690 // apply some normalization before comparing
1691 $actual = trim( $actual );
1692 $wikitext = trim( $wikitext );
1694 $this->assertSame( $wikitext, $actual );
1697 public function provideLanguageConversion() {
1698 $en = $this->createLanguageMock( 'en' );
1699 $enPigLatin = $this->createLanguageMock( 'en-x-piglatin' );
1700 $profileVersion = Parsoid
::AVAILABLE_VERSIONS
[0];
1701 $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion;
1702 $htmlContentType = "text/html; charset=utf-8; profile=\"$htmlProfileUri\"";
1706 'pageName' => __METHOD__
,
1709 'inputContentVersion' => Parsoid
::defaultHTMLVersion()
1714 'pagelanguage' => $en,
1719 'target' => $enPigLatin
1723 ] +
$defaultAttribs;
1726 'contentmodel' => CONTENT_MODEL_WIKITEXT
,
1729 'content-type' => $htmlContentType,
1731 'body' => '<p>test language conversion</p>',
1738 '>esttay anguagelay onversioncay<',
1740 'content-type' => $htmlContentType,
1741 'content-language' => $enPigLatin->toBcp47Code(),
1747 * @dataProvider provideLanguageConversion
1749 public function testLanguageConversion(
1752 string $expectedText,
1753 array $expectedHeaders
1755 $handler = $this->newParsoidHandler();
1757 $pageConfig = $handler->tryToCreatePageConfig( $attribs, null, true );
1758 $response = $handler->languageConversion( $pageConfig, $attribs, $revision );
1760 $body = $response->getBody();
1762 $actual = $body->getContents();
1764 $pb = json_decode( $actual, true );
1765 $this->assertNotEmpty( $pb );
1766 $this->assertArrayHasKey( 'html', $pb );
1767 $this->assertArrayHasKey( 'body', $pb['html'] );
1769 $this->assertStringContainsString( $expectedText, $pb['html']['body'] );
1771 foreach ( $expectedHeaders as $key => $value ) {
1772 $this->assertArrayHasKey( $key, $pb['html']['headers'] );
1773 $this->assertSame( $value, $pb['html']['headers'][$key] );
1777 public static function provideWt2html() {
1778 $profileVersion = '2.6.0';
1779 $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion;
1780 $pbProfileUri = 'https://www.mediawiki.org/wiki/Specs/pagebundle/' . $profileVersion;
1781 $dpProfileUri = 'https://www.mediawiki.org/wiki/Specs/data-parsoid/' . $profileVersion;
1783 $htmlContentType = "text/html; charset=utf-8; profile=\"$htmlProfileUri\"";
1784 $pbContentType = "application/json; charset=utf-8; profile=\"$pbProfileUri\"";
1785 $dpContentType = "application/json; charset=utf-8; profile=\"$dpProfileUri\"";
1786 $lintContentType = "application/json";
1789 'content-type' => $htmlContentType,
1793 'content-type' => $pbContentType,
1797 'content-type' => $lintContentType,
1800 // should get from a title and revision (html) ///////////////////////////////////
1802 '>First Revision Content<',
1803 '<html', // full document
1804 'data-parsoid=' // annotated
1807 $unexpectedText = [];
1810 'oldid' => 1, // will be replaced by a real revision id
1812 yield
'should get from a title and revision (html)' => [
1820 // should get from a title and revision (pagebundle) ///////////////////////////////////
1821 $expectedText = [ // bits of json
1822 '"body":"<!DOCTYPE html>',
1823 'First Revision Content</p>',
1824 'contentmodel' => 'wikitext',
1827 'content-type' => $dpContentType,
1831 'ids' => [ // NOTE: match "First Revision Content"
1832 'mwAA' => [ 'dsr' => [ 0, 22, 0, 0 ] ],
1834 'mwAg' => [ 'dsr' => [ 0, 22, 0, 0 ] ],
1836 'offsetType' => 'ucs2', // as provided in the input
1841 $unexpectedText = [];
1844 'oldid' => 1, // will be replaced by a real revision id
1845 'opts' => [ 'format' => ParsoidFormatHelper
::FORMAT_PAGEBUNDLE
],
1846 // Ensure this is ucs2 so we have a ucs2 offsetType test since
1847 // Parsoid's rt-testing script is node.js based and hence needs
1848 // ucs2 offsets to function correctly!
1849 'offsetType' => 'ucs2', // make sure this is looped through to data-parsoid attribute
1851 yield
'should get from a title and revision (pagebundle)' => [
1859 // should parse the given wikitext ///////////////////////////////////
1860 $wikitext = 'lorem ipsum';
1863 '<html', // full document
1864 'data-parsoid=' // annotated
1867 $unexpectedText = [];
1870 yield
'should parse the given wikitext' => [
1878 // should parse the given wikitext (body_only) ///////////////////////////////////
1879 $wikitext = 'lorem ipsum';
1880 $expectedText = [ '>lorem ipsum<' ];
1882 $unexpectedText = [ '<html' ];
1887 yield
'should parse the given wikitext (body_only)' => [
1895 // should lint the given wikitext ///////////////////////////////////
1896 $wikitext = "{|\nhi\n|ho\n|}";
1898 '"type":"fostered"',
1907 'opts' => [ 'format' => ParsoidFormatHelper
::FORMAT_LINT
]
1910 yield
'should lint the given wikitext' => [
1918 // should parse the given JSON ///////////////////////////////////
1919 $wikitext = '{ "color": "green" }';
1921 // should be rendered as table, not interpreted as wikitext
1928 $unexpectedText = [ '<p>' ];
1932 'contentmodel' => CONTENT_MODEL_JSON
,
1935 yield
'should parse the given JSON' => [
1945 * @dataProvider provideWt2html
1947 * @param array $attribs
1948 * @param string|null $text
1949 * @param array $expectedData
1950 * @param string[] $unexpectedHtml
1951 * @param string[] $expectedHeaders
1953 public function testWt2html(
1956 array $expectedData,
1957 array $unexpectedHtml,
1958 array $expectedHeaders = []
1960 $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0';
1961 $expectedHeaders +
= [
1962 'content-type' => "text/x-wiki; charset=utf-8; profile=\"$htmlProfileUri\"",
1965 $page = $this->getNonexistingTestPage( __METHOD__
);
1966 $status = $this->editPage( $page, 'First Revision Content' );
1967 $currentRev = $status->getNewRevision();
1969 $attribs +
= self
::DEFAULT_ATTRIBS
;
1970 $attribs['opts'] +
= self
::DEFAULT_ATTRIBS
['opts'];
1971 $attribs['opts']['from'] ??
= 'wikitext';
1972 $attribs['opts']['format'] ??
= 'html';
1973 $attribs['envOptions'] +
= self
::DEFAULT_ATTRIBS
['envOptions'];
1975 if ( $attribs['oldid'] ) {
1976 // Set the actual ID of an existing revision
1977 $attribs['oldid'] = $currentRev->getId();
1979 // Make sure we are testing against a non-current revision
1980 $this->editPage( $page, 'this is not the content you are looking for' );
1983 $handler = $this->newParsoidHandler();
1985 $revTextOrId = $text ??
$attribs['oldid'] ??
null;
1986 $pageConfig = $this->getPageConfig( $page, $revTextOrId );
1987 $response = $handler->wt2html( $pageConfig, $attribs, $text );
1988 $body = $response->getBody();
1990 $data = $body->getContents();
1992 foreach ( $expectedHeaders as $name => $value ) {
1993 $responseHeaderValue = $response->getHeaderLine( $name );
1994 if ( $name === 'content-type' ) {
1995 $this->assertTrue( $this->contentTypeMatcher( $value, $responseHeaderValue ) );
1997 $this->assertSame( $value, $responseHeaderValue );
2001 // HACK: try to parse as json, just in case:
2002 $jsonData = json_decode( $data, JSON_OBJECT_AS_ARRAY
);
2004 foreach ( $expectedData as $index => $exp ) {
2005 if ( is_int( $index ) ) {
2006 $this->assertStringContainsString( $exp, $data );
2008 $this->assertArrayHasKey( $index, $jsonData );
2009 if ( $index === 'data-parsoid' ) {
2010 // FIXME: Assert headers as well
2011 $this->assertArrayHasKey( 'body', $jsonData[$index] );
2012 $this->assertSame( $exp['body'], $jsonData[$index]['body'] );
2014 $this->assertSame( $exp, $jsonData[$index] );
2019 foreach ( $unexpectedHtml as $exp ) {
2020 $this->assertStringNotContainsString( $exp, $data );
2024 public function testLenientRevisionHandling() {
2025 $page1 = $this->getNonexistingTestPage( "Page1" );
2026 $status = $this->editPage( $page1, 'Page 1 revision content' );
2027 $rev1 = $status->getNewRevision();
2029 $page2 = $this->getNonexistingTestPage( "Page2" );
2030 $status = $this->editPage( $page2, '#REDIRECT [[Page1]]' );
2031 $rev2 = $status->getNewRevision();
2033 $handler = $this->newParsoidHandler();
2035 // Test 1: <page1, rev1>
2036 $attribs = self
::DEFAULT_ATTRIBS
;
2037 $attribs['opts'] +
= self
::DEFAULT_ATTRIBS
['opts'];
2038 $attribs['opts']['from'] ??
= 'wikitext';
2039 $attribs['opts']['format'] ??
= 'html';
2040 $attribs['envOptions'] +
= self
::DEFAULT_ATTRIBS
['envOptions'];
2041 $attribs['oldid'] = $rev1->getId();
2043 $pageConfig = $this->getPageConfig( $page1, $attribs['oldid'] );
2044 $response = $handler->wt2html( $pageConfig, $attribs );
2045 $body = $response->getBody();
2047 $data = $body->getContents();
2048 $this->assertStringContainsString( 'Page 1 revision content', $data );
2050 // Test 2: <page2, rev2>
2051 $attribs['oldid'] = $rev2->getId();
2052 $pageConfig = $this->getPageConfig( $page2, $attribs['oldid'] );
2053 $response = $handler->wt2html( $pageConfig, $attribs );
2054 $body = $response->getBody();
2056 $data = $body->getContents();
2057 $this->assertStringContainsString( '<link rel="mw:PageProp/redirect" ', $data );
2059 // Test 2: <page2, rev1> <-- should transparently redirect
2060 $attribs['oldid'] = $rev1->getId();
2061 $pageConfig = $this->getPageConfig( $page2, $attribs['oldid'] );
2062 $response = $handler->wt2html( $pageConfig, $attribs );
2063 $body = $response->getBody();
2065 $data = $body->getContents();
2066 $this->assertStringContainsString( 'Page 1 revision content', $data );
2068 // Test 3 repeated with ParserCache to ensure nothing is written to cache!
2069 $parserCache = $this->createNoOpMock( ParserCache
::class, [ 'save', 'get', 'makeParserOutputKey', 'getMetadata' ] );
2070 // This is the critical assertion -- no cache svaes for mismatched rev & page params
2071 $parserCache->expects( $this->never() )->method( 'save' );
2072 // Ensures there is a cache miss
2073 $parserCache->method( 'get' )->willReturn( false );
2074 // Verify that the cache is queried
2075 $parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
2076 $parserCache->expects( $this->atLeastOnce() )->method( 'getMetadata' );
2077 $parserCacheFactory = $this->createNoOpMock(
2078 ParserCacheFactory
::class,
2079 [ 'getParserCache', 'getRevisionOutputCache' ]
2081 $parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
2082 $parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn(
2083 $this->createNoOpMock( RevisionOutputCache
::class )
2085 $this->setService( 'ParserCacheFactory', $parserCacheFactory );
2086 $handler = $this->newParsoidHandler();
2087 $handler->wt2html( $pageConfig, $attribs ); // Reuse pageconfig & attribs from test 3
2090 public function testWt2html_ParserCache() {
2091 $page = $this->getExistingTestPage();
2092 $pageConfig = $this->getPageConfig( $page );
2094 $parserCache = $this->createNoOpMock( ParserCache
::class, [ 'save', 'get', 'makeParserOutputKey', 'getMetadata' ] );
2096 // This is the critical assertion in this test case: the save() method should
2097 // be called exactly once!
2098 $parserCache->expects( $this->once() )->method( 'save' );
2099 $parserCache->method( 'get' )->willReturn( false );
2100 // These methods will be called by ParserOutputAccess:qa
2101 $parserCache->expects( $this->atLeastOnce() )->method( 'makeParserOutputKey' );
2102 $parserCache->expects( $this->atLeastOnce() )->method( 'getMetadata' );
2104 $parserCacheFactory = $this->createNoOpMock(
2105 ParserCacheFactory
::class,
2106 [ 'getParserCache', 'getRevisionOutputCache' ]
2108 $parserCacheFactory->method( 'getParserCache' )->willReturn( $parserCache );
2109 $parserCacheFactory->method( 'getRevisionOutputCache' )->willReturn(
2110 $this->createNoOpMock( RevisionOutputCache
::class )
2113 $this->setService( 'ParserCacheFactory', $parserCacheFactory );
2115 $attribs = self
::DEFAULT_ATTRIBS
;
2116 $attribs['opts']['from'] = 'wikitext';
2117 $attribs['opts']['format'] = 'html';
2119 $handler = $this->newParsoidHandler();
2121 // This should trigger a parser cache write, because we didn't set a write-ratio
2122 $handler->wt2html( $pageConfig, $attribs );
2124 $this->overrideConfigValue( 'TemporaryParsoidHandlerParserCacheWriteRatio', 0 );
2126 // This should not trigger a parser cache write, because we set the write-ration to 0
2127 $handler->wt2html( $pageConfig, $attribs );
2130 public function testWt2html_BadContentModel() {
2131 $page = $this->getNonexistingTestPage( __METHOD__
);
2132 $this->editPage( $page, new JavaScriptContent( '"not wikitext"' ) );
2133 $pageConfig = $this->getPageConfig( $page );
2135 $attribs = self
::DEFAULT_ATTRIBS
;
2136 $attribs['opts']['from'] = 'wikitext';
2137 // Asking for a 'pagebundle' here because of T325137.
2138 $attribs['opts']['format'] = 'pagebundle';
2140 $handler = $this->newParsoidHandler();
2141 $response = $handler->wt2html( $pageConfig, $attribs );
2143 $this->assertSame( 200, $response->getStatusCode() );
2145 $body = $response->getBody();
2147 $data = $body->getContents();
2149 $jsonData = json_decode( $data, JSON_OBJECT_AS_ARRAY
);
2151 $this->assertIsArray( $jsonData );
2152 $this->assertStringContainsString( "Dummy output", $jsonData['html']['body'] );
2155 // TODO: test wt2html failure modes
2156 // TODO: test redlinks
2158 public function createLanguageMock( string $code ) {
2159 // Ensure that we always return the same object for a given code.
2161 if ( !isset( $seen[$code] ) ) {
2162 $langMock = $this->createMock( Language
::class );
2164 ->method( 'getCode' )
2165 ->willReturn( $code );
2166 $bcp47 = LanguageCode
::bcp47( $code );
2168 ->method( 'getHtmlCode' )
2169 ->willReturn( $bcp47 );
2171 ->method( 'toBcp47Code' )
2172 ->willReturn( $bcp47 );
2174 ->method( 'getDir' )
2175 ->willReturn( 'ltr' );
2176 $seen[$code] = $langMock;
2178 return $seen[$code];