Merge "jquery.tablesorter: Silence an expected "sort-rowspan-error" warning"
[mediawiki.git] / resources / src / mediawiki.jqueryMsg / mediawiki.jqueryMsg.js
blob8c297e48bc69886211eac69998864af81760e206
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 /**
9 * @typedef {string|string[]} module:mediawiki.jqueryMsg~Replacements
10 * @ignore
12 /**
13 * @callback {Function} module:mediawiki.jqueryMsg~MessageFormatterFunction
14 * @param {Array<module:mediawiki.jqueryMsg~Replacements>} replacements Optional variable replacements (variadically or an array).
15 * This is a mixed array of strings or arrays of string. This is equivalent to Array<string|string[]> but cannot be documented until the
16 * jsdoc theme has been patched (T354716).
17 * @return {jQuery} Rendered HTML.
18 * @ignore
21 /**
22 * @callback {Function} MessageFormatterFunctionGenerator
23 * @return {module:mediawiki.jqueryMsg~MessageFormatterFunction}
24 * @ignore
27 const slice = Array.prototype.slice,
28 util = require( 'mediawiki.util' ),
29 mwString = require( 'mediawiki.String' ),
30 parserDefaults = {
31 // Magic words and their expansions. Server-side data is added to this below.
32 magic: {
33 PAGENAME: mw.config.get( 'wgPageName' ),
34 PAGENAMEE: util.wikiUrlencode( mw.config.get( 'wgPageName' ) ),
35 SERVERNAME: mw.config.get( 'wgServerName' ),
36 CONTENTLANGUAGE: mw.config.get( 'wgContentLanguage' )
38 // Whitelist for allowed HTML elements in wikitext.
39 // Self-closing tags are not currently supported.
40 // Filled in with server-side data below
41 allowedHtmlElements: [],
42 // Key tag name, value allowed attributes for that tag.
43 // See Sanitizer::setupAttributeWhitelist
44 allowedHtmlCommonAttributes: [
45 // HTML
46 'id',
47 'class',
48 'style',
49 'lang',
50 'dir',
51 'title',
53 // WAI-ARIA
54 'role'
57 // Attributes allowed for specific elements.
58 // Key is element name in lower case
59 // Value is array of allowed attributes for that element
60 allowedHtmlAttributesByElement: {},
61 messages: mw.messages,
62 language: mw.language,
64 // Same meaning as in mediawiki.js.
66 // Only 'text', 'parse', and 'escaped' are supported, and the
67 // actual escaping for 'escaped' is done by other code (generally
68 // through mediawiki.js).
70 // However, note that this default only
71 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
72 // is 'text', including when it uses jqueryMsg.
73 format: 'parse'
76 // Add in server-side data (allowedHtmlElements and magic words)
77 $.extend( true, parserDefaults, require( './parserDefaults.json' ) );
79 /**
80 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
81 * convert what it detects as an htmlString to an element.
83 * If our own HtmlEmitter jQuery object is given, its children will be unwrapped and appended to
84 * new parent.
86 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
88 * @private
89 * @param {jQuery} $parent Parent node wrapped by jQuery
90 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
91 * @return {jQuery} $parent
93 function appendWithoutParsing( $parent, children ) {
94 let i, len;
96 if ( !Array.isArray( children ) ) {
97 children = [ children ];
100 for ( i = 0, len = children.length; i < len; i++ ) {
101 if ( typeof children[ i ] !== 'object' ) {
102 children[ i ] = document.createTextNode( children[ i ] );
104 if ( children[ i ] instanceof $ && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
105 children[ i ] = children[ i ].contents();
109 return $parent.append( children );
113 * Decodes the main HTML entities, those encoded by mw.html.escape.
115 * @private
116 * @param {string} encoded Encoded string
117 * @return {string} String with those entities decoded
119 function decodePrimaryHtmlEntities( encoded ) {
120 return encoded
121 .replace( /&#039;/g, '\'' )
122 .replace( /&quot;/g, '"' )
123 .replace( /&lt;/g, '<' )
124 .replace( /&gt;/g, '>' )
125 .replace( /&amp;/g, '&' );
129 * Turn input into a string.
131 * @private
132 * @param {string|jQuery} input
133 * @return {string} Textual value of input
135 function textify( input ) {
136 if ( input instanceof $ ) {
137 input = input.text();
139 return String( input );
143 * Given parser options, return a function that parses a key and replacements, returning jQuery object
145 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
146 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
147 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
149 * @private
150 * @param {Object} options Parser options
151 * @return {Function}
152 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
153 * @return {jQuery} return.return
155 function getFailableParserFn( options ) {
156 return function ( args ) {
157 const parser = new Parser( options ),
158 key = args[ 0 ],
159 argsArray = Array.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
160 try {
161 return parser.parse( key, argsArray );
162 } catch ( e ) {
163 const fallback = parser.settings.messages.get( key );
164 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
165 mw.track( 'mediawiki.jqueryMsg.error', {
166 messageKey: key,
167 errorMessage: e.message
168 } );
169 return $( '<span>' ).text( fallback );
175 * Initialize parser defaults.
177 * This is currently used by the QUnit testrunner to change the reference in
178 * parserDefaults.messages to the test messages and back.
180 * @private
181 * @param {Object} data New data to extend parser defaults with
183 const setParserDefaults = function ( data ) {
184 Object.assign( parserDefaults, data );
188 * Get current parser defaults.
190 * Primarily used for the unit test. Returns a copy.
192 * @private
193 * @return {Object}
195 const getParserDefaults = function () {
196 return Object.assign( {}, parserDefaults );
200 * Returns a function suitable for static use, to construct strings from a message key (and optional replacements).
202 * @ignore
203 * @param {Object} options parser options
204 * @return {module:mediawiki.jqueryMsg~MessageFormatterFunction}
206 const defaultMessageFunction = function ( options ) {
207 let failableParserFn, format;
209 if ( options && options.format !== undefined ) {
210 format = options.format;
211 } else {
212 format = parserDefaults.format;
215 return function () {
216 if ( !failableParserFn ) {
217 failableParserFn = getFailableParserFn( options );
219 const $result = failableParserFn( arguments );
220 if ( format === 'text' || format === 'escaped' ) {
221 return $result.text();
222 } else {
223 return $result.html();
228 * @type {MessageFormatterFunctionGenerator}
229 * @ignore
231 let messageFunction = defaultMessageFunction;
234 * @ignore
235 * @param {Object} options parser options
236 * @return {module:mediawiki.jqueryMsg~MessageFormatterFunction} options
238 const getMessageFunction = function ( options ) {
239 return messageFunction( options );
243 * Allows tests to override the message function.
245 * @ignore
246 * @param {MessageFormatterFunctionGenerator} msgFunction
247 * @return {Function} that allows you to restore the original message function.
249 const setMessageFunction = function ( msgFunction ) {
250 messageFunction = msgFunction;
251 return function () {
252 messageFunction = defaultMessageFunction;
257 * Returns a jQuery plugin.
259 * @ignore
260 * @param {Object} [options] Parser options
261 * @return {module:mediawiki.jqueryMsg~MessageFormatterFunction}
263 const getPlugin = function ( options ) {
264 let failableParserFn;
266 return function () {
267 if ( !failableParserFn ) {
268 failableParserFn = getFailableParserFn( options );
270 const $result = failableParserFn( arguments );
271 return this.empty().append( $result.contents() );
276 * The parser itself.
277 * Describes an object, whose primary duty is to .parse() message keys.
279 * @class
280 * @private
281 * @param {Object} options
283 function Parser( options ) {
284 this.settings = Object.assign( {}, parserDefaults, options );
285 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
286 this.astCache = {};
288 this.emitter = new HtmlEmitter( this.settings.language, this.settings.magic );
291 Parser.prototype = {
293 * Where the magic happens.
294 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
295 * If an error is thrown, returns original key, and logs the error
297 * @param {string} key Message key.
298 * @param {Array} replacements Variable replacements for $1, $2... $n
299 * @return {jQuery}
301 parse: function ( key, replacements ) {
302 const ast = this.getAst( key, replacements );
303 return this.emitter.emit( ast, replacements );
307 * Fetch the message string associated with a key, return parsed structure. Memoized.
308 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
310 * @param {string} key
311 * @param {Array} replacements Variable replacements for $1, $2... $n
312 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
314 getAst: function ( key, replacements ) {
315 if ( !Object.prototype.hasOwnProperty.call( this.astCache, key ) ) {
316 let wikiText = this.settings.messages.get( key );
317 // Keep this synchronised with Message#parser in mediawiki.base.js
318 if (
319 mw.config.get( 'wgUserLanguage' ) === 'qqx' &&
320 ( !wikiText || wikiText === '(' + key + ')' )
322 wikiText = '(' + key + '$*)';
323 } else if ( typeof wikiText !== 'string' ) {
324 wikiText = '⧼' + key + '⧽';
326 wikiText = mw.internalDoTransformFormatForQqx( wikiText, replacements );
327 this.astCache[ key ] = this.wikiTextToAst( wikiText );
329 return this.astCache[ key ];
333 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
335 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
336 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
338 * @param {string} input Message string wikitext
339 * @throws Error
340 * @return {any} abstract syntax tree
342 wikiTextToAst: function ( input ) {
343 let nonWhitespaceExpression = null, expression = null, templateContents = null, paramExpression = null, colon = null;
344 const settings = this.settings;
346 // Indicates current position in input as we parse through it.
347 // Shared among all parsing functions below.
348 let pos = 0;
350 // =========================================================
351 // parsing combinators - could be a library on its own
352 // =========================================================
355 * Try parsers until one works, if none work return null
357 * @private
358 * @param {Function[]} ps
359 * @return {Function} that will return {string|null}
361 function choice( ps ) {
362 return function () {
363 let i, result;
364 for ( i = 0; i < ps.length; i++ ) {
365 result = ps[ i ]();
366 if ( result !== null ) {
367 return result;
370 return null;
375 * Try several ps in a row, all must succeed or return null.
376 * This is the only eager one.
378 * @private
379 * @param {Function[]} ps Each function should return a string or null
380 * @return {string[]|null}
382 function sequence( ps ) {
383 const originalPos = pos,
384 result = [];
385 for ( let i = 0; i < ps.length; i++ ) {
386 const r = ps[ i ]();
387 if ( r === null ) {
388 pos = originalPos;
389 return null;
391 result.push( r );
393 return result;
397 * Run the same parser over and over until it fails.
398 * Must succeed a minimum of n times or return null.
400 * @private
401 * @param {number} n
402 * @param {Function} p Should return a string or null
403 * @return {Function} that will return {string[]|null}
405 function nOrMore( n, p ) {
406 return function () {
407 const originalPos = pos,
408 result = [];
409 let parsed = p();
410 while ( parsed !== null ) {
411 result.push( parsed );
412 parsed = p();
414 if ( result.length < n ) {
415 pos = originalPos;
416 return null;
418 return result;
423 * Just make parsers out of simpler JS builtin types
425 * @private
426 * @param {string} s
427 * @return {Function} that will return {string|null}
429 function makeStringParser( s ) {
430 const len = s.length;
431 return function () {
432 let result = null;
433 if ( input.slice( pos, pos + len ) === s ) {
434 result = s;
435 pos += len;
437 return result;
442 * Makes a regex parser, given a RegExp object.
443 * The regex being passed in should start with a ^ to anchor it to the start
444 * of the string.
446 * @private
447 * @param {RegExp} regex anchored regex
448 * @return {Function} function to parse input based on the regex
450 function makeRegexParser( regex ) {
451 return function () {
452 const matches = input.slice( pos ).match( regex );
453 if ( matches === null ) {
454 return null;
456 pos += matches[ 0 ].length;
457 return matches[ 0 ];
461 // ===================================================================
462 // General patterns above this line -- wikitext specific parsers below
463 // ===================================================================
465 // Parsing functions follow. All parsing functions work like this:
466 // They don't accept any arguments.
467 // Instead, they just operate non destructively on the string 'input'
468 // As they can consume parts of the string, they advance the shared variable pos,
469 // and return tokens (or whatever else they want to return).
470 // some things are defined as closures and other things as ordinary functions
471 // converting everything to a closure makes it a lot harder to debug... errors pop up
472 // but some debuggers can't tell you exactly where they come from. Also the mutually
473 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
474 // This may be because, to save code, memoization was removed
476 /* eslint-disable no-useless-escape */
477 const regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
478 const regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
479 const regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
480 /* eslint-enable no-useless-escape */
482 const backslash = makeStringParser( '\\' );
483 const anyCharacter = makeRegexParser( /^./ );
484 function escapedLiteral() {
485 const result = sequence( [
486 backslash,
487 anyCharacter
488 ] );
489 return result === null ? null : result[ 1 ];
491 const escapedOrLiteralWithoutSpace = choice( [
492 escapedLiteral,
493 regularLiteralWithoutSpace
494 ] );
495 const escapedOrLiteralWithoutBar = choice( [
496 escapedLiteral,
497 regularLiteralWithoutBar
498 ] );
499 const escapedOrRegularLiteral = choice( [
500 escapedLiteral,
501 regularLiteral
502 ] );
503 // Used to define "literals" without spaces, in space-delimited situations
504 function literalWithoutSpace() {
505 const result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
506 return result === null ? null : result.join( '' );
508 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
509 // it is not a literal in the parameter
510 function literalWithoutBar() {
511 const result = nOrMore( 1, escapedOrLiteralWithoutBar )();
512 return result === null ? null : result.join( '' );
515 function literal() {
516 const result = nOrMore( 1, escapedOrRegularLiteral )();
517 return result === null ? null : result.join( '' );
520 const asciiAlphabetLiteral = makeRegexParser( /^[A-Za-z]+/ );
522 const whitespace = makeRegexParser( /^\s+/ );
524 const dollar = makeStringParser( '$' );
525 const digits = makeRegexParser( /^\d+/ );
526 function replacement() {
527 const result = sequence( [
528 dollar,
529 digits
530 ] );
531 if ( result === null ) {
532 return null;
534 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
536 const openExtlink = makeStringParser( '[' );
537 const closeExtlink = makeStringParser( ']' );
538 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
539 function extlink() {
540 const parsedResult = sequence( [
541 openExtlink,
542 nOrMore( 1, nonWhitespaceExpression ),
543 whitespace,
544 nOrMore( 1, expression ),
545 closeExtlink
546 ] );
547 if ( parsedResult === null ) {
548 return null;
550 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
551 // passing fancy parameters (like a whole jQuery object or a function) to use for the
552 // link. Check only if it's a single match, since we can either do CONCAT or not for
553 // singles with the same effect.
554 const target = parsedResult[ 1 ].length === 1 ?
555 parsedResult[ 1 ][ 0 ] :
556 [ 'CONCAT', ...parsedResult[ 1 ] ];
557 return [
558 'EXTLINK',
559 target,
560 [ 'CONCAT', ...parsedResult[ 3 ] ]
563 const pipe = makeStringParser( '|' );
565 const openTemplate = makeStringParser( '{{' );
566 const closeTemplate = makeStringParser( '}}' );
567 function template() {
568 const result = sequence( [
569 openTemplate,
570 templateContents,
571 closeTemplate
572 ] );
573 return result === null ? null : result[ 1 ];
576 function templateName() {
577 // see $wgLegalTitleChars
578 // not allowing : due to the need to catch "PLURAL:$1"
579 const templateNameRegex = makeRegexParser( /^#?[ !"$&'()*,./0-9;=?@A-Z^_`a-z~\x80-\xFF+-]+/ );
580 const result = templateNameRegex();
581 return result === null ? null : result.toString();
584 function templateParam() {
585 const result = sequence( [
586 pipe,
587 nOrMore( 0, paramExpression )
588 ] );
589 if ( result === null ) {
590 return null;
592 const expr = result[ 1 ];
593 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
594 return expr.length > 1 ? [ 'CONCAT', ...expr ] : expr[ 0 ];
597 function templateNameWithParam() {
598 const result = sequence( [
599 templateName,
600 colon,
601 nOrMore( 0, paramExpression )
602 ] );
603 if ( result === null ) {
604 return null;
606 const expr = result[ 2 ];
607 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
608 return [ result[ 0 ], expr.length > 1 ? [ 'CONCAT', ...expr ] : expr[ 0 ] ];
610 colon = makeStringParser( ':' );
611 templateContents = choice( [
612 function () {
613 const result = sequence( [
614 templateNameWithParam,
615 nOrMore( 0, templateParam )
616 ] );
617 return result === null ? null : [ ...result[ 0 ], ...result[ 1 ] ];
619 function () {
620 const result = sequence( [
621 templateName,
622 nOrMore( 0, templateParam )
623 ] );
624 if ( result === null ) {
625 return null;
627 return [ result[ 0 ], ...result[ 1 ] ];
629 ] );
631 function pipedWikilink() {
632 const result = sequence( [
633 nOrMore( 1, paramExpression ),
634 pipe,
635 nOrMore( 1, expression )
636 ] );
637 return result === null ? null : [
638 [ 'CONCAT', ...result[ 0 ] ],
639 [ 'CONCAT', ...result[ 2 ] ]
643 function unpipedWikilink() {
644 const result = sequence( [
645 nOrMore( 1, paramExpression )
646 ] );
647 return result === null ? null : [
648 [ 'CONCAT', ...result[ 0 ] ]
652 const wikilinkContents = choice( [
653 pipedWikilink,
654 unpipedWikilink
655 ] );
657 const openWikilink = makeStringParser( '[[' );
658 const closeWikilink = makeStringParser( ']]' );
659 function wikilink() {
660 const parsedResult = sequence( [
661 openWikilink,
662 wikilinkContents,
663 closeWikilink
664 ] );
665 return parsedResult === null ? null : [ 'WIKILINK', ...parsedResult[ 1 ] ];
668 // TODO: Support data- if appropriate
669 const doubleQuote = makeStringParser( '"' );
670 const htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
671 function doubleQuotedHtmlAttributeValue() {
672 const parsedResult = sequence( [
673 doubleQuote,
674 htmlDoubleQuoteAttributeValue,
675 doubleQuote
676 ] );
677 return parsedResult === null ? null : parsedResult[ 1 ];
680 const singleQuote = makeStringParser( '\'' );
681 const htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
682 function singleQuotedHtmlAttributeValue() {
683 const parsedResult = sequence( [
684 singleQuote,
685 htmlSingleQuoteAttributeValue,
686 singleQuote
687 ] );
688 return parsedResult === null ? null : parsedResult[ 1 ];
691 const htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
692 function htmlAttribute() {
693 const parsedResult = sequence( [
694 whitespace,
695 asciiAlphabetLiteral,
696 htmlAttributeEquals,
697 choice( [
698 doubleQuotedHtmlAttributeValue,
699 singleQuotedHtmlAttributeValue
701 ] );
702 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
706 * Checks if HTML is allowed
708 * @param {string} startTagName HTML start tag name
709 * @param {string} endTagName HTML start tag name
710 * @param {Object} attributes array of consecutive key value pairs,
711 * with index 2 * n being a name and 2 * n + 1 the associated value
712 * @return {boolean} true if this is HTML is allowed, false otherwise
713 * @ignore
715 function isAllowedHtml( startTagName, endTagName, attributes ) {
716 startTagName = startTagName.toLowerCase();
717 endTagName = endTagName.toLowerCase();
718 if ( startTagName !== endTagName || settings.allowedHtmlElements.indexOf( startTagName ) === -1 ) {
719 return false;
722 const badStyle = /[\000-\010\013\016-\037\177]|expression|filter\s*:|accelerator\s*:|-o-link\s*:|-o-link-source\s*:|-o-replace\s*:|url\s*\(|image\s*\(|image-set\s*\(/i;
724 let attributeName;
725 for ( let i = 0, len = attributes.length; i < len; i += 2 ) {
726 attributeName = attributes[ i ];
727 if ( settings.allowedHtmlCommonAttributes.indexOf( attributeName ) === -1 &&
728 ( settings.allowedHtmlAttributesByElement[ startTagName ] || [] ).indexOf( attributeName ) === -1 ) {
729 return false;
731 if ( attributeName === 'style' && attributes[ i + 1 ].search( badStyle ) !== -1 ) {
732 mw.log( 'HTML tag not parsed due to dangerous style attribute' );
733 return false;
737 return true;
740 function htmlAttributes() {
741 const parsedResult = nOrMore( 0, htmlAttribute )();
742 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
743 return [ 'HTMLATTRIBUTES' ].concat( ...parsedResult );
746 const openHtmlStartTag = makeStringParser( '<' );
747 const optionalForwardSlash = makeRegexParser( /^\/?/ );
748 const openHtmlEndTag = makeStringParser( '</' );
749 const closeHtmlTag = makeRegexParser( /^\s*>/ );
750 // Subset of allowed HTML markup.
751 // Most elements and many attributes allowed on the server are not supported yet.
752 function html() {
753 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
754 // 1. open through closeHtmlTag
755 // 2. expression
756 // 3. openHtmlEnd through close
757 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
759 const startOpenTagPos = pos;
760 const parsedOpenTagResult = sequence( [
761 openHtmlStartTag,
762 asciiAlphabetLiteral,
763 htmlAttributes,
764 optionalForwardSlash,
765 closeHtmlTag
766 ] );
768 if ( parsedOpenTagResult === null ) {
769 return null;
772 const endOpenTagPos = pos;
773 const startTagName = parsedOpenTagResult[ 1 ];
775 const parsedHtmlContents = nOrMore( 0, expression )();
777 const startCloseTagPos = pos;
778 const parsedCloseTagResult = sequence( [
779 openHtmlEndTag,
780 asciiAlphabetLiteral,
781 closeHtmlTag
782 ] );
784 if ( parsedCloseTagResult === null ) {
785 // Closing tag failed. Return the start tag and contents.
786 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ),
787 ...parsedHtmlContents ];
790 const endCloseTagPos = pos;
791 const endTagName = parsedCloseTagResult[ 1 ];
792 const wrappedAttributes = parsedOpenTagResult[ 2 ];
793 const attributes = wrappedAttributes.slice( 1 );
794 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
795 return [ 'HTMLELEMENT', startTagName, wrappedAttributes,
796 ...parsedHtmlContents ];
798 // HTML is not allowed, so contents will remain how
799 // it was, while HTML markup at this level will be
800 // treated as text
801 // E.g. assuming script tags are not allowed:
803 // <script>[[Foo|bar]]</script>
805 // results in '&lt;script&gt;' and '&lt;/script&gt;'
806 // (not treated as an HTML tag), surrounding a fully
807 // parsed HTML link.
809 // Concatenate everything from the tag, flattening the contents.
810 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ),
811 ...parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) ];
814 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
815 function nowiki() {
816 const parsedResult = sequence( [
817 makeStringParser( '<nowiki>' ),
818 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
819 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
820 makeStringParser( '</nowiki>' )
821 ] );
822 return parsedResult === null ? null : [ 'CONCAT', ...parsedResult[ 1 ] ];
825 nonWhitespaceExpression = choice( [
826 template,
827 wikilink,
828 extlink,
829 replacement,
830 literalWithoutSpace
831 ] );
832 paramExpression = choice( [
833 template,
834 wikilink,
835 extlink,
836 replacement,
837 literalWithoutBar
838 ] );
840 expression = choice( [
841 template,
842 wikilink,
843 extlink,
844 replacement,
845 nowiki,
846 html,
847 literal
848 ] );
850 const regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
851 function curlyBraceTransformExpressionLiteral() {
852 const result = nOrMore( 1, regularLiteralWithSquareBrackets )();
853 return result === null ? null : result.join( '' );
855 // Used when only {{-transformation is wanted, for 'text'
856 // or 'escaped' formats
857 const curlyBraceTransformExpression = choice( [
858 template,
859 replacement,
860 curlyBraceTransformExpressionLiteral
861 ] );
864 * Starts the parse
866 * @param {Function} rootExpression Root parse function
867 * @return {Array|null}
868 * @ignore
870 function start( rootExpression ) {
871 const result = nOrMore( 0, rootExpression )();
872 if ( result === null ) {
873 return null;
875 return [ 'CONCAT', ...result ];
877 // everything above this point is supposed to be stateless/static, but
878 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
879 // finally let's do some actual work...
881 const res = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
884 * For success, the p must have gotten to the end of the input
885 * and returned a non-null.
886 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
888 if ( res === null || pos !== input.length ) {
889 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
891 return res;
897 * Class that primarily exists to emit HTML from parser ASTs.
899 * @private
900 * @class
901 * @param {mw.language} language
902 * @param {Object.<string,string>} [magic]
904 function HtmlEmitter( language, magic ) {
905 this.language = language;
906 for ( const key in ( magic || {} ) ) {
907 const val = magic[ key ];
908 this[ key.toLowerCase() ] = function () {
909 return val;
914 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
915 * Walk entire node structure, applying replacements and template functions when appropriate
917 * @param {any} node Abstract syntax tree (top node or subnode)
918 * @param {Array} replacements for $1, $2, ... $n
919 * @return {any} single-string node or array of nodes suitable for jQuery appending
921 this.emit = ( node, replacements ) => {
922 switch ( typeof node ) {
923 case 'string':
924 case 'number':
925 return node;
927 // typeof returns object for arrays
928 case 'object': {
929 // node is an array of nodes
930 // eslint-disable-next-line no-jquery/no-map-util
931 const subnodes = $.map( node.slice( 1 ), ( n ) => this.emit( n, replacements ) );
932 const operation = node[ 0 ].toLowerCase();
933 if ( typeof this[ operation ] === 'function' ) {
934 return this[ operation ]( subnodes, replacements );
935 } else {
936 throw new Error( 'Unknown operation "' + operation + '"' );
940 case 'undefined':
941 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
942 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
943 // The logical thing is probably to return the empty string here when we encounter undefined.
944 return '';
946 default:
947 throw new Error( 'Unexpected type in AST: ' + typeof node );
952 // BIDI utility function, copied from jquery.i18n.emitter.bidi.js
954 // Matches the first strong directionality codepoint:
955 // - in group 1 if it is LTR
956 // - in group 2 if it is RTL
957 // Does not match if there is no strong directionality codepoint.
959 // Generated by UnicodeJS (see tools/strongDir) from the UCD; see
960 // https://gerrit.wikimedia.org/g/unicodejs .
961 // eslint-disable-next-line no-misleading-character-class
962 const strongDirRegExp = new RegExp(
963 '(?:' +
964 '(' +
965 '[\u0041-\u005a\u0061-\u007a\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02b8\u02bb-\u02c1\u02d0\u02d1\u02e0-\u02e4\u02ee\u0370-\u0373\u0376\u0377\u037a-\u037d\u037f\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0482\u048a-\u052f\u0531-\u0556\u0559-\u055f\u0561-\u0587\u0589\u0903-\u0939\u093b\u093d-\u0940\u0949-\u094c\u094e-\u0950\u0958-\u0961\u0964-\u0980\u0982\u0983\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd-\u09c0\u09c7\u09c8\u09cb\u09cc\u09ce\u09d7\u09dc\u09dd\u09df-\u09e1\u09e6-\u09f1\u09f4-\u09fa\u0a03\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a3e-\u0a40\u0a59-\u0a5c\u0a5e\u0a66-\u0a6f\u0a72-\u0a74\u0a83\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd-\u0ac0\u0ac9\u0acb\u0acc\u0ad0\u0ae0\u0ae1\u0ae6-\u0af0\u0af9\u0b02\u0b03\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b3e\u0b40\u0b47\u0b48\u0b4b\u0b4c\u0b57\u0b5c\u0b5d\u0b5f-\u0b61\u0b66-\u0b77\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bbe\u0bbf\u0bc1\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcc\u0bd0\u0bd7\u0be6-\u0bf2\u0c01-\u0c03\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c39\u0c3d\u0c41-\u0c44\u0c58-\u0c5a\u0c60\u0c61\u0c66-\u0c6f\u0c7f\u0c82\u0c83\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd-\u0cc4\u0cc6-\u0cc8\u0cca\u0ccb\u0cd5\u0cd6\u0cde\u0ce0\u0ce1\u0ce6-\u0cef\u0cf1\u0cf2\u0d02\u0d03\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d-\u0d40\u0d46-\u0d48\u0d4a-\u0d4c\u0d4e\u0d57\u0d5f-\u0d61\u0d66-\u0d75\u0d79-\u0d7f\u0d82\u0d83\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0dcf-\u0dd1\u0dd8-\u0ddf\u0de6-\u0def\u0df2-\u0df4\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e4f-\u0e5b\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0ed0-\u0ed9\u0edc-\u0edf\u0f00-\u0f17\u0f1a-\u0f34\u0f36\u0f38\u0f3e-\u0f47\u0f49-\u0f6c\u0f7f\u0f85\u0f88-\u0f8c\u0fbe-\u0fc5\u0fc7-\u0fcc\u0fce-\u0fda\u1000-\u102c\u1031\u1038\u103b\u103c\u103f-\u1057\u105a-\u105d\u1061-\u1070\u1075-\u1081\u1083\u1084\u1087-\u108c\u108e-\u109c\u109e-\u10c5\u10c7\u10cd\u10d0-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1360-\u137c\u1380-\u138f\u13a0-\u13f5\u13f8-\u13fd\u1401-\u167f\u1681-\u169a\u16a0-\u16f8\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1735\u1736\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17b6\u17be-\u17c5\u17c7\u17c8\u17d4-\u17da\u17dc\u17e0-\u17e9\u1810-\u1819\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191e\u1923-\u1926\u1929-\u192b\u1930\u1931\u1933-\u1938\u1946-\u196d\u1970-\u1974\u1980-\u19ab\u19b0-\u19c9\u19d0-\u19da\u1a00-\u1a16\u1a19\u1a1a\u1a1e-\u1a55\u1a57\u1a61\u1a63\u1a64\u1a6d-\u1a72\u1a80-\u1a89\u1a90-\u1a99\u1aa0-\u1aad\u1b04-\u1b33\u1b35\u1b3b\u1b3d-\u1b41\u1b43-\u1b4b\u1b50-\u1b6a\u1b74-\u1b7c\u1b82-\u1ba1\u1ba6\u1ba7\u1baa\u1bae-\u1be5\u1be7\u1bea-\u1bec\u1bee\u1bf2\u1bf3\u1bfc-\u1c2b\u1c34\u1c35\u1c3b-\u1c49\u1c4d-\u1c7f\u1cc0-\u1cc7\u1cd3\u1ce1\u1ce9-\u1cec\u1cee-\u1cf3\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u200e\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u214f\u2160-\u2188\u2336-\u237a\u2395\u249c-\u24e9\u26ac\u2800-\u28ff\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d70\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u3005-\u3007\u3021-\u3029\u302e\u302f\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u3190-\u31ba\u31f0-\u321c\u3220-\u324f\u3260-\u327b\u327f-\u32b0\u32c0-\u32cb\u32d0-\u32fe\u3300-\u3376\u337b-\u33dd\u33e0-\u33fe\u3400-\u4db5\u4e00-\u9fd5\ua000-\ua48c\ua4d0-\ua60c\ua610-\ua62b\ua640-\ua66e\ua680-\ua69d\ua6a0-\ua6ef\ua6f2-\ua6f7\ua722-\ua787\ua789-\ua7ad\ua7b0-\ua7b7\ua7f7-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua824\ua827\ua830-\ua837\ua840-\ua873\ua880-\ua8c3\ua8ce-\ua8d9\ua8f2-\ua8fd\ua900-\ua925\ua92e-\ua946\ua952\ua953\ua95f-\ua97c\ua983-\ua9b2\ua9b4\ua9b5\ua9ba\ua9bb\ua9bd-\ua9cd\ua9cf-\ua9d9\ua9de-\ua9e4\ua9e6-\ua9fe\uaa00-\uaa28\uaa2f\uaa30\uaa33\uaa34\uaa40-\uaa42\uaa44-\uaa4b\uaa4d\uaa50-\uaa59\uaa5c-\uaa7b\uaa7d-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaaeb\uaaee-\uaaf5\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uab30-\uab65\uab70-\uabe4\uabe6\uabe7\uabe9-\uabec\uabf0-\uabf9\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\ue000-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc]|\ud800[\udc00-\udc0b]|\ud800[\udc0d-\udc26]|\ud800[\udc28-\udc3a]|\ud800\udc3c|\ud800\udc3d|\ud800[\udc3f-\udc4d]|\ud800[\udc50-\udc5d]|\ud800[\udc80-\udcfa]|\ud800\udd00|\ud800\udd02|\ud800[\udd07-\udd33]|\ud800[\udd37-\udd3f]|\ud800[\uddd0-\uddfc]|\ud800[\ude80-\ude9c]|\ud800[\udea0-\uded0]|\ud800[\udf00-\udf23]|\ud800[\udf30-\udf4a]|\ud800[\udf50-\udf75]|\ud800[\udf80-\udf9d]|\ud800[\udf9f-\udfc3]|\ud800[\udfc8-\udfd5]|\ud801[\udc00-\udc9d]|\ud801[\udca0-\udca9]|\ud801[\udd00-\udd27]|\ud801[\udd30-\udd63]|\ud801\udd6f|\ud801[\ude00-\udf36]|\ud801[\udf40-\udf55]|\ud801[\udf60-\udf67]|\ud804\udc00|\ud804[\udc02-\udc37]|\ud804[\udc47-\udc4d]|\ud804[\udc66-\udc6f]|\ud804[\udc82-\udcb2]|\ud804\udcb7|\ud804\udcb8|\ud804[\udcbb-\udcc1]|\ud804[\udcd0-\udce8]|\ud804[\udcf0-\udcf9]|\ud804[\udd03-\udd26]|\ud804\udd2c|\ud804[\udd36-\udd43]|\ud804[\udd50-\udd72]|\ud804[\udd74-\udd76]|\ud804[\udd82-\uddb5]|\ud804[\uddbf-\uddc9]|\ud804\uddcd|\ud804[\uddd0-\udddf]|\ud804[\udde1-\uddf4]|\ud804[\ude00-\ude11]|\ud804[\ude13-\ude2e]|\ud804\ude32|\ud804\ude33|\ud804\ude35|\ud804[\ude38-\ude3d]|\ud804[\ude80-\ude86]|\ud804\ude88|\ud804[\ude8a-\ude8d]|\ud804[\ude8f-\ude9d]|\ud804[\ude9f-\udea9]|\ud804[\udeb0-\udede]|\ud804[\udee0-\udee2]|\ud804[\udef0-\udef9]|\ud804\udf02|\ud804\udf03|\ud804[\udf05-\udf0c]|\ud804\udf0f|\ud804\udf10|\ud804[\udf13-\udf28]|\ud804[\udf2a-\udf30]|\ud804\udf32|\ud804\udf33|\ud804[\udf35-\udf39]|\ud804[\udf3d-\udf3f]|\ud804[\udf41-\udf44]|\ud804\udf47|\ud804\udf48|\ud804[\udf4b-\udf4d]|\ud804\udf50|\ud804\udf57|\ud804[\udf5d-\udf63]|\ud805[\udc80-\udcb2]|\ud805\udcb9|\ud805[\udcbb-\udcbe]|\ud805\udcc1|\ud805[\udcc4-\udcc7]|\ud805[\udcd0-\udcd9]|\ud805[\udd80-\uddb1]|\ud805[\uddb8-\uddbb]|\ud805\uddbe|\ud805[\uddc1-\udddb]|\ud805[\ude00-\ude32]|\ud805\ude3b|\ud805\ude3c|\ud805\ude3e|\ud805[\ude41-\ude44]|\ud805[\ude50-\ude59]|\ud805[\ude80-\udeaa]|\ud805\udeac|\ud805\udeae|\ud805\udeaf|\ud805\udeb6|\ud805[\udec0-\udec9]|\ud805[\udf00-\udf19]|\ud805\udf20|\ud805\udf21|\ud805\udf26|\ud805[\udf30-\udf3f]|\ud806[\udca0-\udcf2]|\ud806\udcff|\ud806[\udec0-\udef8]|\ud808[\udc00-\udf99]|\ud809[\udc00-\udc6e]|\ud809[\udc70-\udc74]|\ud809[\udc80-\udd43]|\ud80c[\udc00-\udfff]|\ud80d[\udc00-\udc2e]|\ud811[\udc00-\ude46]|\ud81a[\udc00-\ude38]|\ud81a[\ude40-\ude5e]|\ud81a[\ude60-\ude69]|\ud81a\ude6e|\ud81a\ude6f|\ud81a[\uded0-\udeed]|\ud81a\udef5|\ud81a[\udf00-\udf2f]|\ud81a[\udf37-\udf45]|\ud81a[\udf50-\udf59]|\ud81a[\udf5b-\udf61]|\ud81a[\udf63-\udf77]|\ud81a[\udf7d-\udf8f]|\ud81b[\udf00-\udf44]|\ud81b[\udf50-\udf7e]|\ud81b[\udf93-\udf9f]|\ud82c\udc00|\ud82c\udc01|\ud82f[\udc00-\udc6a]|\ud82f[\udc70-\udc7c]|\ud82f[\udc80-\udc88]|\ud82f[\udc90-\udc99]|\ud82f\udc9c|\ud82f\udc9f|\ud834[\udc00-\udcf5]|\ud834[\udd00-\udd26]|\ud834[\udd29-\udd66]|\ud834[\udd6a-\udd72]|\ud834\udd83|\ud834\udd84|\ud834[\udd8c-\udda9]|\ud834[\uddae-\udde8]|\ud834[\udf60-\udf71]|\ud835[\udc00-\udc54]|\ud835[\udc56-\udc9c]|\ud835\udc9e|\ud835\udc9f|\ud835\udca2|\ud835\udca5|\ud835\udca6|\ud835[\udca9-\udcac]|\ud835[\udcae-\udcb9]|\ud835\udcbb|\ud835[\udcbd-\udcc3]|\ud835[\udcc5-\udd05]|\ud835[\udd07-\udd0a]|\ud835[\udd0d-\udd14]|\ud835[\udd16-\udd1c]|\ud835[\udd1e-\udd39]|\ud835[\udd3b-\udd3e]|\ud835[\udd40-\udd44]|\ud835\udd46|\ud835[\udd4a-\udd50]|\ud835[\udd52-\udea5]|\ud835[\udea8-\udeda]|\ud835[\udedc-\udf14]|\ud835[\udf16-\udf4e]|\ud835[\udf50-\udf88]|\ud835[\udf8a-\udfc2]|\ud835[\udfc4-\udfcb]|\ud836[\udc00-\uddff]|\ud836[\ude37-\ude3a]|\ud836[\ude6d-\ude74]|\ud836[\ude76-\ude83]|\ud836[\ude85-\ude8b]|\ud83c[\udd10-\udd2e]|\ud83c[\udd30-\udd69]|\ud83c[\udd70-\udd9a]|\ud83c[\udde6-\ude02]|\ud83c[\ude10-\ude3a]|\ud83c[\ude40-\ude48]|\ud83c\ude50|\ud83c\ude51|[\ud840-\ud868][\udc00-\udfff]|\ud869[\udc00-\uded6]|\ud869[\udf00-\udfff]|[\ud86a-\ud86c][\udc00-\udfff]|\ud86d[\udc00-\udf34]|\ud86d[\udf40-\udfff]|\ud86e[\udc00-\udc1d]|\ud86e[\udc20-\udfff]|[\ud86f-\ud872][\udc00-\udfff]|\ud873[\udc00-\udea1]|\ud87e[\udc00-\ude1d]|[\udb80-\udbbe][\udc00-\udfff]|\udbbf[\udc00-\udffd]|[\udbc0-\udbfe][\udc00-\udfff]|\udbff[\udc00-\udffd]' +
966 ')|(' +
967 '[\u0590\u05be\u05c0\u05c3\u05c6\u05c8-\u05ff\u07c0-\u07ea\u07f4\u07f5\u07fa-\u0815\u081a\u0824\u0828\u082e-\u0858\u085c-\u089f\u200f\ufb1d\ufb1f-\ufb28\ufb2a-\ufb4f\u0608\u060b\u060d\u061b-\u064a\u066d-\u066f\u0671-\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u0710\u0712-\u072f\u074b-\u07a5\u07b1-\u07bf\u08a0-\u08e2\ufb50-\ufd3d\ufd40-\ufdcf\ufdf0-\ufdfc\ufdfe\ufdff\ufe70-\ufefe]|\ud802[\udc00-\udd1e]|\ud802[\udd20-\ude00]|\ud802\ude04|\ud802[\ude07-\ude0b]|\ud802[\ude10-\ude37]|\ud802[\ude3b-\ude3e]|\ud802[\ude40-\udee4]|\ud802[\udee7-\udf38]|\ud802[\udf40-\udfff]|\ud803[\udc00-\ude5f]|\ud803[\ude7f-\udfff]|\ud83a[\udc00-\udccf]|\ud83a[\udcd7-\udfff]|\ud83b[\udc00-\uddff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\udf00-\udfff]|\ud83b[\ude00-\udeef]|\ud83b[\udef2-\udeff]' +
968 ')' +
973 * Gets directionality of the first strongly directional codepoint
975 * This is the rule the BIDI algorithm uses to determine the directionality of
976 * paragraphs ( http://unicode.org/reports/tr9/#The_Paragraph_Level ) and
977 * FSI isolates ( http://unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
979 * TODO: Does not handle BIDI control characters inside the text.
980 * TODO: Does not handle unallocated characters.
982 * @ignore
983 * @param {string} text The text from which to extract initial directionality.
984 * @return {string|null} Directionality (either 'ltr' or 'rtl')
986 function strongDirFromContent( text ) {
987 const m = text.match( strongDirRegExp );
988 if ( !m ) {
989 return null;
991 if ( m[ 2 ] === undefined ) {
992 return 'ltr';
994 return 'rtl';
997 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
998 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
999 // If you have 'magic words' then configure the parser to have them upon creation.
1001 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
1002 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
1003 HtmlEmitter.prototype = {
1005 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1006 * Must return a single node to parents -- a jQuery with synthetic span
1007 * However, unwrap any other synthetic spans in our children and pass them upwards
1009 * @param {any[]} nodes Some single nodes, some arrays of nodes
1010 * @return {jQuery}
1012 concat: function ( nodes ) {
1013 const $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1014 // Use Array.from since mixed parameter.
1015 Array.from( nodes ).forEach( ( node ) => {
1016 // Let jQuery append nodes, arrays of nodes and jQuery objects
1017 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1018 appendWithoutParsing( $span, node );
1019 } );
1020 return $span;
1024 * Return escaped replacement of correct index, or string if unavailable.
1025 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1026 * if the specified parameter is not found return the same string
1027 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1029 * If the replacement at the index is an object, then a special property
1030 * is is added to it (if it does not exist already).
1031 * If the special property was already set, then we try to clone (instead of append)
1032 * the replacement object. This allows allow using a jQuery or HTMLElement object
1033 * multiple times within a single interface message.
1035 * TODO: Throw error if nodes.length > 1 ?
1037 * @param {Array} nodes List of one element, integer, n >= 0
1038 * @param {Array} replacements List of at least n strings
1039 * @return {string|jQuery} replacement
1041 replace: function ( nodes, replacements ) {
1042 const index = parseInt( nodes[ 0 ], 10 );
1044 if ( index < replacements.length ) {
1045 if ( typeof replacements[ index ] === 'object' ) {
1046 // Only actually clone on second use
1047 if ( !replacements[ index ].mwJQueryMsgHasAlreadyBeenUsedAsAReplacement ) {
1048 // Add our special property to the foreign object
1049 // in the least invasive way
1050 Object.defineProperty(
1051 replacements[ index ],
1052 'mwJQueryMsgHasAlreadyBeenUsedAsAReplacement',
1054 value: true,
1055 enumerable: false,
1056 writable: false
1059 return replacements[ index ];
1061 if ( typeof replacements[ index ].clone === 'function' ) {
1062 // if it is a jQuery object, use jQuery's clone method
1063 return replacements[ index ].clone( true );
1065 if ( typeof replacements[ index ].cloneNode === 'function' ) {
1066 // if it is a Node, then use the native cloning functionality
1067 return replacements[ index ].cloneNode( true );
1069 return replacements[ index ];
1071 return replacements[ index ];
1072 } else {
1073 // index not found, fallback to displaying variable
1074 return '$' + ( index + 1 );
1079 * Transform wiki-link
1081 * TODO:
1082 * It only handles basic cases, either no pipe, or a pipe with an explicit
1083 * anchor.
1085 * It does not attempt to handle features like the pipe trick.
1086 * However, the pipe trick should usually not be present in wikitext retrieved
1087 * from the server, since the replacement is done at save time.
1088 * It may, though, if the wikitext appears in extension-controlled content.
1090 * @param {string[]} nodes
1091 * @return {jQuery}
1093 wikilink: function ( nodes ) {
1094 let page = textify( nodes[ 0 ] );
1095 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1096 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1097 if ( page.charAt( 0 ) === ':' ) {
1098 page = page.slice( 1 );
1100 const title = new mw.Title( page );
1102 let anchor;
1103 if ( nodes.length === 1 ) {
1104 // [[Some Page]] or [[Namespace:Some Page]]
1105 anchor = page;
1106 } else {
1107 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1108 anchor = nodes[ 1 ];
1111 const $el = $( '<a>' ).attr( {
1112 title: title.getPrefixedText() || null,
1113 href: title.getUrl()
1114 } );
1115 return appendWithoutParsing( $el, anchor );
1119 * Converts array of HTML element key value pairs to object
1121 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1122 * name and 2 * n + 1 the associated value
1123 * @return {Object} Object mapping attribute name to attribute value
1125 htmlattributes: function ( nodes ) {
1126 const mapping = {};
1127 for ( let i = 0, len = nodes.length; i < len; i += 2 ) {
1128 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1130 return mapping;
1134 * Handles an (already-validated) HTML element.
1136 * @param {Array} nodes Nodes to process when creating element
1137 * @return {jQuery}
1139 htmlelement: function ( nodes ) {
1140 const tagName = nodes.shift();
1141 const attributes = nodes.shift();
1142 const contents = nodes;
1143 const $element = $( document.createElement( tagName ) ).attr( attributes );
1144 return appendWithoutParsing( $element, contents );
1148 * Transform parsed structure into external link.
1150 * The "href" can be:
1151 * - a jQuery object, treat it as "enclosing" the link text.
1152 * - a function, treat it as the click handler.
1153 * - a string, or our HtmlEmitter jQuery object, treat it as a URI after stringifying.
1155 * TODO: throw an error if nodes.length > 2 ?
1157 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1158 * @return {jQuery}
1160 extlink: function ( nodes ) {
1161 const arg = nodes[ 0 ],
1162 contents = nodes[ 1 ];
1163 let $el;
1164 if ( arg instanceof $ && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1165 $el = arg;
1166 } else {
1167 $el = $( '<a>' );
1168 if ( typeof arg === 'function' ) {
1169 $el.attr( {
1170 role: 'button',
1171 tabindex: 0
1172 } ).on( 'click keypress', function ( e ) {
1173 if (
1174 e.type === 'click' ||
1175 e.type === 'keypress' && e.which === 13
1177 arg.call( this, e );
1179 } );
1180 } else {
1181 const target = textify( arg );
1183 if ( target.search( new RegExp( '^(/|' + mw.config.get( 'wgUrlProtocols' ) + ')' ) ) !== -1 ) {
1184 $el.attr( 'href', target );
1185 if ( target.search( '^' + mw.config.get( 'wgArticlePath' ).replace( /\$1/g, '.+?' ) + '$' ) === -1 ) {
1186 $el.addClass( 'external' );
1188 } else {
1189 mw.log( 'External link in message had illegal target ' + target );
1190 return appendWithoutParsing(
1191 $( '<span>' ),
1192 [ '[' + target + ' ' ].concat( contents ).concat( ']' )
1193 ).contents();
1197 return appendWithoutParsing( $el.empty(), contents );
1201 * Transform formal syntax
1203 * @param {string[]} nodes List of nodes
1204 * @return {string|jQuery} selected (in)formal form according to the current language
1206 '#formal': function ( nodes ) {
1207 const formalityIndex = this.language.getData(
1208 mw.config.get( 'wgUserLanguage' ),
1209 'formalityIndex'
1212 if ( nodes.length === 0 ) {
1213 return '';
1214 } else if ( nodes.length === 1 ) {
1215 return nodes[ 0 ];
1218 return nodes[ formalityIndex ];
1222 * Transform parsed structure into pluralization
1223 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1224 * So convert it back with the current language's convertNumber.
1226 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1227 * @return {string|jQuery} selected pluralized form according to current language
1229 plural: function ( nodes ) {
1230 const explicitPluralForms = {};
1232 const count = parseFloat( this.language.convertNumber( textify( nodes[ 0 ] ), true ) );
1233 let forms = nodes.slice( 1 );
1234 for ( let formIndex = 0; formIndex < forms.length; formIndex++ ) {
1235 const form = forms[ formIndex ];
1237 if ( form instanceof $ && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1238 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1239 const firstChild = form.contents().get( 0 );
1240 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1241 const firstChildText = firstChild.textContent;
1242 if ( /^\d+=/.test( firstChildText ) ) {
1243 const explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1244 // Use the digit part as key and rest of first text node and
1245 // rest of child nodes as value.
1246 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1247 explicitPluralForms[ explicitPluralFormNumber ] = form;
1248 forms[ formIndex ] = undefined;
1251 } else if ( /^\d+=/.test( form ) ) {
1252 // Simple explicit plural forms like 12=a dozen
1253 const explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1254 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1255 forms[ formIndex ] = undefined;
1259 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1260 // eslint-disable-next-line no-jquery/no-map-util
1261 forms = $.map( forms, ( f ) => f );
1263 return this.language.convertPlural( count, forms, explicitPluralForms );
1267 * Transform parsed structure according to gender.
1269 * The first node must be one of:
1270 * - the mw.user object (or a compatible one)
1271 * - an empty string - indicating the current user, same effect as passing the mw.user object
1272 * - a gender string ('male', 'female' or 'unknown')
1274 * @example
1275 * {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}
1277 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1278 * @return {string|jQuery} Selected gender form according to current language
1280 gender: function ( nodes ) {
1281 const forms = nodes.slice( 1 );
1283 let maybeUser = nodes[ 0 ];
1284 if ( maybeUser === '' ) {
1285 maybeUser = mw.user;
1288 let gender;
1289 // If we are passed a mw.user-like object, check their gender.
1290 // Otherwise, assume the gender string itself was passed .
1291 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1292 gender = maybeUser.options.get( 'gender' );
1293 } else {
1294 gender = textify( maybeUser );
1297 return this.language.gender( gender, forms );
1301 * Wraps argument with unicode control characters for directionality safety
1303 * Identical to the implementation in jquery.i18n.emitter.bidi.js
1305 * This solves the problem where directionality-neutral characters at the edge of
1306 * the argument string get interpreted with the wrong directionality from the
1307 * enclosing context, giving renderings that look corrupted like "(Ben_(WMF".
1309 * The wrapping is LRE...PDF or RLE...PDF, depending on the detected
1310 * directionality of the argument string, using the BIDI algorithm's own "First
1311 * strong directional codepoint" rule. Essentially, this works round the fact that
1312 * there is no embedding equivalent of U+2068 FSI (isolation with heuristic
1313 * direction inference). The latter is cleaner but still not widely supported.
1315 * @param {string[]} nodes The text nodes from which to take the first item.
1316 * @return {string} Wrapped String of content as needed.
1318 bidi: function ( nodes ) {
1319 const dir = strongDirFromContent( nodes[ 0 ] );
1320 if ( dir === 'ltr' ) {
1321 // Wrap in LEFT-TO-RIGHT EMBEDDING ... POP DIRECTIONAL FORMATTING
1322 return '\u202A' + nodes[ 0 ] + '\u202C';
1324 if ( dir === 'rtl' ) {
1325 // Wrap in RIGHT-TO-LEFT EMBEDDING ... POP DIRECTIONAL FORMATTING
1326 return '\u202B' + nodes[ 0 ] + '\u202C';
1328 // No strong directionality: do not wrap
1329 return nodes[ 0 ];
1333 * Transform parsed structure into grammar conversion.
1334 * Invoked by putting `{{grammar:form|word}}` in a message
1336 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1337 * @return {string|jQuery} selected grammatical form according to current language
1339 grammar: function ( nodes ) {
1340 const form = nodes[ 0 ],
1341 word = nodes[ 1 ];
1342 // These could be jQuery objects (passed as message parameters),
1343 // in which case we can't transform them (like rawParams() in PHP).
1344 if ( typeof form === 'string' && typeof word === 'string' ) {
1345 return this.language.convertGrammar( word, form );
1347 return word;
1351 * Transform parsed structure into a int: (interface language) message include
1352 * Invoked by putting `{{int:othermessage}}` into a message
1354 * TODO Syntax in the included message is not parsed, this seems like a bug?
1356 * @param {Array} nodes List of nodes
1357 * @return {string} Other message
1359 int: function ( nodes ) {
1360 const msg = textify( nodes[ 0 ] );
1361 return getMessageFunction()( mwString.lcFirst( msg ) );
1365 * Get localized namespace name from canonical name or namespace number.
1366 * Invoked by putting `{{ns:foo}}` into a message
1368 * @param {Array} nodes List of nodes
1369 * @return {string} Localized namespace name
1371 ns: function ( nodes ) {
1372 let ns = textify( nodes[ 0 ] ).trim();
1373 if ( !/^\d+$/.test( ns ) ) {
1374 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1376 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1377 return ns || '';
1381 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1382 * and outputs it in the localized digit script and formatted with decimal
1383 * separator, according to the current language.
1385 * @param {Array} nodes List of nodes
1386 * @return {number|string|jQuery} Formatted number
1388 formatnum: function ( nodes ) {
1389 const isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1390 number = nodes[ 0 ];
1392 // These could be jQuery objects (passed as message parameters),
1393 // in which case we can't transform them (like rawParams() in PHP).
1394 if ( typeof number === 'string' || typeof number === 'number' ) {
1395 return this.language.convertNumber( number, isInteger );
1397 return number;
1401 * Takes a pagename and optional URL queries and returns a full URL to that
1402 * page (with URL queries).
1404 * @param {Array} nodes List of nodes
1405 * @return {string} A URL string
1407 fullurl: function ( nodes ) {
1408 const targetPage = textify( nodes[ 0 ] ),
1409 queryObject = {};
1410 let queryStrings = nodes[ 1 ];
1412 if ( queryStrings ) {
1413 queryStrings = textify( queryStrings );
1414 queryStrings = new URLSearchParams( queryStrings );
1415 for ( const [ key, value ] of queryStrings.entries() ) {
1416 queryObject[ key ] = value;
1420 return mw.config.get( 'wgServer' ) + util.getUrl( targetPage, queryObject );
1424 * Lowercase text
1426 * @param {Array} nodes List of nodes
1427 * @return {string} The given text, all in lowercase
1429 lc: function ( nodes ) {
1430 return textify( nodes[ 0 ] ).toLowerCase();
1434 * Uppercase text
1436 * @param {Array} nodes List of nodes
1437 * @return {string} The given text, all in uppercase
1439 uc: function ( nodes ) {
1440 return textify( nodes[ 0 ] ).toUpperCase();
1444 * Lowercase first letter of input, leaving the rest unchanged
1446 * @param {Array} nodes List of nodes
1447 * @return {string} The given text, with the first character in lowercase
1449 lcfirst: function ( nodes ) {
1450 const text = textify( nodes[ 0 ] );
1451 return mwString.lcFirst( text );
1455 * Uppercase first letter of input, leaving the rest unchanged
1457 * @param {Array} nodes List of nodes
1458 * @return {string} The given text, with the first character in uppercase
1460 ucfirst: function ( nodes ) {
1461 const text = textify( nodes[ 0 ] );
1462 return mwString.ucFirst( text );
1467 * Provides a {@link jQuery} plugin that parses messages.
1469 * @module mediawiki.jqueryMsg
1472 * Parses the message in the message key, doing replacements optionally, and appends the nodes to
1473 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
1475 * To use this {@link jQuery} plugin, load the `mediawiki.jqueryMsg` module with {@link mw.loader}.
1477 * @memberof module:mediawiki.jqueryMsg
1478 * @param {string} message key
1479 * @param {...string[]} arguments
1480 * @example
1481 * mw.loader.using('mediawiki.jqueryMsg' ).then(() => {
1482 * var $userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
1483 * $( 'p#headline' ).msg( 'hello-user', $userlink );
1484 * } );
1486 * // N.B. replacements are variadic arguments or an array in second parameter. In other words:
1487 * somefunction( a, b, c, d )
1488 * // is equivalent to
1489 * somefunction( a, [b, c, d] )
1491 * // Note: We append to 'this', which in a jQuery plugin context will be the selected elements.
1493 $.fn.msg = getPlugin();
1495 // Replace the default message parser with jqueryMsg
1496 const oldParser = mw.Message.prototype.parser;
1497 mw.Message.prototype.parser = function ( format ) {
1498 // Fall back to mw.msg's simple parser where possible
1499 if (
1500 // Plain text output always uses the simple parser
1501 format === 'plain' ||
1503 // jqueryMsg parser is needed for messages containing wikitext
1504 !/\{\{|[<>[&]/.test( this.map.get( this.key ) ) &&
1505 // jqueryMsg parser is needed when jQuery objects or DOM nodes are passed in as parameters
1506 !this.parameters.some( ( param ) => param instanceof $ || ( param && param.nodeType !== undefined ) )
1509 return oldParser.call( this, format );
1512 if ( !Object.prototype.hasOwnProperty.call( this.map, format ) ) {
1513 this.map[ format ] = getMessageFunction( {
1514 messages: this.map,
1515 // For format 'escaped', escaping part is handled by mediawiki.js
1516 format: format
1517 } );
1519 return this.map[ format ]( this.key, this.parameters );
1523 * Parse the message to DOM nodes, rather than HTML string like {@link mw.Message#parse}.
1525 * This method is only available when jqueryMsg is loaded.
1527 * @example
1528 * const msg = mw.message( 'key' );
1529 * mw.loader.using(`mediawiki.jqueryMsg`).then(() => {
1530 * if ( msg.isParseable() ) {
1531 * const $node = msg.parseDom();
1532 * $node.appendTo('body');
1534 * })
1536 * @since 1.27
1537 * @method parseDom
1538 * @memberof mw.Message.prototype
1539 * @return {jQuery}
1541 mw.Message.prototype.parseDom = ( function () {
1542 let failableParserFn;
1544 return function () {
1545 if ( !failableParserFn ) {
1546 failableParserFn = getFailableParserFn();
1548 const $result = failableParserFn( [ this.key, this.parameters ] );
1549 return $result.contents();
1551 }() );
1554 * Check whether the message contains only syntax supported by jqueryMsg.
1556 * This method is only available when jqueryMsg is loaded.
1558 * @example
1559 * const msg = mw.message( 'key' );
1560 * mw.loader.using(`mediawiki.jqueryMsg`).then(() => {
1561 * if ( msg.isParseable() ) {
1562 * ...
1564 * })
1566 * @since 1.41
1567 * @method isParseable
1568 * @memberof mw.Message.prototype
1569 * @return {boolean}
1571 mw.Message.prototype.isParseable = function () {
1572 const parser = new Parser();
1573 try {
1574 parser.parse( this.key, this.parameters );
1575 return true;
1576 } catch ( e ) {
1577 return false;
1582 * Can be deleted when MobileFrontend is updated.
1583 * https://phabricator.wikimedia.org/T354540
1585 * @private
1587 mw.jqueryMsg = {
1588 Parser
1590 mw.log.deprecate( mw, 'jqueryMsg', mw.jqueryMsg, 'mw.jqueryMsg is a @private library.' );
1591 // Expose for testing purposes only (not a stable API).
1592 module.exports = {
1593 test: {
1594 getMessageFunction,
1595 setMessageFunction,
1596 getParserDefaults,
1597 setParserDefaults,
1598 Parser