Revisions: Style action links in old revision notices as links
[mediawiki.git] / includes / parser / RemexRemoveTagHandler.php
blob403507e5cd4d2fa503afff3da4086ebe4ee887c5
1 <?php
3 namespace MediaWiki\Parser;
5 use Wikimedia\RemexHtml\Tokenizer\Attributes;
6 use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
7 use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
8 use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
10 /**
11 * Helper class for Sanitizer::removeSomeTags().
12 * @internal
14 class RemexRemoveTagHandler extends RelayTokenHandler {
15 /**
16 * @var string The original HTML source string (used for fallback text
17 * when rejecting an HTML tag).
19 private $source;
21 /**
22 * @var array<string,true> Set of HTML tags which can be self-closed.
24 private $htmlsingle;
26 /**
27 * @var array<string,true> Self-closed tags which are on $htmlsingle
28 * but not on $htmlsingleonly will be emitted as an empty element.
30 private $htmlsingleonly;
32 /**
33 * @var array<string,true> Set of allowed HTML open/close tags.
35 private $htmlelements;
37 /**
38 * @var ?callable(Attributes,mixed...):Attributes Callback to mutate or
39 * sanitize attributes.
41 private $attrCallback;
43 /**
44 * @var ?array $args Optional extra arguments to provide to the
45 * $attrCallback.
47 private $callbackArgs;
49 /**
50 * @param TokenHandler $nextHandler Handler to relay accepted tokens.
51 * @param string $source Input source string.
52 * @param array $tagData Information about allowed/rejected tags.
53 * @param ?callable $attrCallback Attribute handler callback.
54 * The full signature is ?callable(Attributes,mixed...):Attributes
55 * @param ?array $callbackArgs Optional arguments to attribute handler.
57 public function __construct(
58 TokenHandler $nextHandler,
59 string $source,
60 array $tagData,
61 ?callable $attrCallback,
62 ?array $callbackArgs
63 ) {
64 parent::__construct( $nextHandler );
65 $this->source = $source;
66 $this->htmlsingle = $tagData['htmlsingle'];
67 $this->htmlsingleonly = $tagData['htmlsingleonly'];
68 $this->htmlelements = $tagData['htmlelements'];
69 $this->attrCallback = $attrCallback;
70 $this->callbackArgs = $callbackArgs ?? [];
73 /**
74 * @inheritDoc
76 public function comment( $text, $sourceStart, $sourceLength ) {
77 // Don't relay comments.
80 /**
81 * Takes attribute names and values for a tag and the tag name and
82 * validates that the tag is allowed to be present.
83 * This DOES NOT validate the attributes, nor does it validate the
84 * tags themselves. This method only handles the special circumstances
85 * where we may want to allow a tag within content but ONLY when it has
86 * specific attributes set.
88 * @param string $element
89 * @param Attributes $attrs
90 * @return bool
92 * @see Sanitizer::validateTag()
94 private static function validateTag( string $element, Attributes $attrs ): bool {
95 if ( $element == 'meta' || $element == 'link' ) {
96 $params = $attrs->getValues();
97 if ( !isset( $params['itemprop'] ) ) {
98 // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
99 return false;
101 if ( $element == 'meta' && !isset( $params['content'] ) ) {
102 // <meta> must have a content="" for the itemprop
103 return false;
105 if ( $element == 'link' && !isset( $params['href'] ) ) {
106 // <link> must have an associated href=""
107 return false;
111 return true;
115 * @inheritDoc
117 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
118 // Handle a start tag from the tokenizer: either relay it to the
119 // next stage, or re-emit it as raw text.
121 $badtag = false;
122 $t = strtolower( $name );
123 if ( isset( $this->htmlelements[$t] ) ) {
124 if ( $this->attrCallback ) {
125 $attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
127 if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
128 // Remove the self-closing slash, to be consistent with
129 // HTML5 semantics. T134423
130 $selfClose = false;
132 if ( !self::validateTag( $t, $attrs ) ) {
133 $badtag = true;
135 $fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
136 $attrs = new PlainAttributes( $fixedAttrs );
137 if ( !$badtag ) {
138 if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
139 // Interpret self-closing tags as empty tags even when
140 // HTML5 would interpret them as start tags. Such input
141 // is commonly seen on Wikimedia wikis with this intention.
142 $this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
143 $this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
144 } else {
145 $this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
147 return;
150 // Emit this as a text node instead.
151 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
155 * @inheritDoc
157 public function endTag( $name, $sourceStart, $sourceLength ) {
158 // Handle an end tag from the tokenizer: either relay it to the
159 // next stage, or re-emit it as raw text.
161 $t = strtolower( $name );
162 if ( isset( $this->htmlelements[$t] ) ) {
163 // This is a good tag, relay it.
164 $this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
165 } else {
166 // Emit this as a text node instead.
167 $this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );