3 namespace MediaWiki\Tidy
;
5 use MediaWiki\Config\ServiceOptions
;
6 use MediaWiki\MainConfigNames
;
7 use Wikimedia\RemexHtml\HTMLData
;
8 use Wikimedia\RemexHtml\Serializer\Serializer
;
9 use Wikimedia\RemexHtml\Serializer\SerializerWithTracer
;
10 use Wikimedia\RemexHtml\Tokenizer\Tokenizer
;
11 use Wikimedia\RemexHtml\TreeBuilder\Dispatcher
;
12 use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder
;
13 use Wikimedia\RemexHtml\TreeBuilder\TreeMutationTracer
;
15 class RemexDriver
extends TidyDriverBase
{
17 private $treeMutationTrace;
19 private $serializerTrace;
25 private $enableLegacyMediaDOM;
28 public const CONSTRUCTOR_OPTIONS
= [
29 MainConfigNames
::TidyConfig
,
30 MainConfigNames
::ParserEnableLegacyMediaDOM
,
34 * @param ServiceOptions $options
36 public function __construct( ServiceOptions
$options ) {
37 $options->assertRequiredOptions( self
::CONSTRUCTOR_OPTIONS
);
38 $config = $options->get( MainConfigNames
::TidyConfig
);
39 $this->enableLegacyMediaDOM
= $options->get( MainConfigNames
::ParserEnableLegacyMediaDOM
);
41 'treeMutationTrace' => false,
42 'serializerTrace' => false,
43 'mungerTrace' => false,
46 $this->treeMutationTrace
= $config['treeMutationTrace'];
47 $this->serializerTrace
= $config['serializerTrace'];
48 $this->mungerTrace
= $config['mungerTrace'];
49 $this->pwrap
= $config['pwrap'];
50 parent
::__construct( $config );
54 public function tidy( $text, ?callable
$textProcessor = null ) {
55 $traceCallback = static function ( $msg ) {
56 wfDebug( "RemexHtml: $msg" );
58 $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
59 if ( $this->serializerTrace
) {
60 $serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
62 $serializer = new Serializer( $formatter );
65 $munger = new RemexCompatMunger( $serializer, $this->mungerTrace
);
67 $munger = $serializer;
69 if ( $this->treeMutationTrace
) {
70 $tracer = new TreeMutationTracer( $munger, $traceCallback );
74 $treeBuilderClass = $this->enableLegacyMediaDOM ? TreeBuilder
::class : RemexCompatBuilder
::class;
75 $treeBuilder = new $treeBuilderClass( $tracer, [
76 'ignoreErrors' => true,
77 'ignoreNulls' => true,
79 $dispatcher = new Dispatcher( $treeBuilder );
80 $tokenizer = new Tokenizer( $dispatcher, $text, [
81 'ignoreErrors' => true,
82 'ignoreCharRefs' => true,
83 'ignoreNulls' => true,
84 'skipPreprocess' => true,
87 $tokenizer->execute( [
88 'fragmentNamespace' => HTMLData
::NS_HTML
,
89 'fragmentName' => 'body'
91 return $serializer->getResult();