2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
15 slice = Array.prototype.slice,
18 PAGENAME: mw.config.get( 'wgPageName' ),
19 PAGENAMEE: mw.util.wikiUrlencode( mw.config.get( 'wgPageName' ) ),
20 SITENAME: mw.config.get( 'wgSiteName' )
22 // Whitelist for allowed HTML elements in wikitext.
23 // Self-closing tags are not currently supported.
24 // Can be populated via setPrivateData().
25 allowedHtmlElements: [],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes: [
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement: {},
45 messages: mw.messages,
46 language: mw.language,
48 // Same meaning as in mediawiki.js.
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
61 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
62 * convert what it detects as an htmlString to an element.
64 * If our own htmlEmitter jQuery object is given, its children will be unwrapped and appended to
67 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
70 * @param {jQuery} $parent Parent node wrapped by jQuery
71 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
72 * @return {jQuery} $parent
74 function appendWithoutParsing( $parent, children ) {
77 if ( !$.isArray( children ) ) {
78 children = [ children ];
81 for ( i = 0, len = children.length; i < len; i++ ) {
82 if ( typeof children[ i ] !== 'object' ) {
83 children[ i ] = document.createTextNode( children[ i ] );
85 if ( children[ i ] instanceof jQuery && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
86 children[ i ] = children[ i ].contents();
90 return $parent.append( children );
94 * Decodes the main HTML entities, those encoded by mw.html.escape.
97 * @param {string} encoded Encoded string
98 * @return {string} String with those entities decoded
100 function decodePrimaryHtmlEntities( encoded ) {
102 .replace( /'/g, '\'' )
103 .replace( /"/g, '"' )
104 .replace( /</g, '<' )
105 .replace( />/g, '>' )
106 .replace( /&/g, '&' );
110 * Turn input into a string.
113 * @param {string|jQuery} input
114 * @return {string} Textual value of input
116 function textify( input ) {
117 if ( input instanceof jQuery ) {
118 input = input.text();
120 return String( input );
124 * Given parser options, return a function that parses a key and replacements, returning jQuery object
126 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
127 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
128 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
131 * @param {Object} options Parser options
133 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
134 * @return {jQuery} return.return
136 function getFailableParserFn( options ) {
137 return function ( args ) {
139 // eslint-disable-next-line new-cap
140 parser = new mw.jqueryMsg.parser( options ),
142 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
144 return parser.parse( key, argsArray );
146 fallback = parser.settings.messages.get( key );
147 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
148 mw.track( 'mediawiki.jqueryMsg.error', {
150 errorMessage: e.message
152 return $( '<span>' ).text( fallback );
160 * Initialize parser defaults.
162 * ResourceLoaderJqueryMsgModule calls this to provide default values from
163 * Sanitizer.php for allowed HTML elements. To override this data for individual
164 * parsers, pass the relevant options to mw.jqueryMsg.parser.
167 * @param {Object} data
169 mw.jqueryMsg.setParserDefaults = function ( data ) {
170 $.extend( parserDefaults, data );
174 * Get current parser defaults.
176 * Primarily used for the unit test. Returns a copy.
181 mw.jqueryMsg.getParserDefaults = function () {
182 return $.extend( {}, parserDefaults );
186 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
189 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
190 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
192 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
193 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
195 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
196 * somefunction( a, b, c, d )
198 * somefunction( a, [b, c, d] )
200 * @param {Object} options parser options
201 * @return {Function} Function suitable for assigning to window.gM
202 * @return {string} return.key Message key.
203 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
204 * @return {string} return.return Rendered HTML.
206 mw.jqueryMsg.getMessageFunction = function ( options ) {
207 var failableParserFn, format;
209 if ( options && options.format !== undefined ) {
210 format = options.format;
212 format = parserDefaults.format;
217 if ( !failableParserFn ) {
218 failableParserFn = getFailableParserFn( options );
220 failableResult = failableParserFn( arguments );
221 if ( format === 'text' || format === 'escaped' ) {
222 return failableResult.text();
224 return failableResult.html();
230 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
231 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
234 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
235 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
236 * $( 'p#headline' ).msg( 'hello-user', userlink );
238 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
239 * somefunction( a, b, c, d )
241 * somefunction( a, [b, c, d] )
243 * We append to 'this', which in a jQuery plugin context will be the selected elements.
245 * @param {Object} options Parser options
246 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
247 * @return {string} return.key Message key.
248 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
249 * @return {jQuery} return.return
251 mw.jqueryMsg.getPlugin = function ( options ) {
252 var failableParserFn;
256 if ( !failableParserFn ) {
257 failableParserFn = getFailableParserFn( options );
259 $target = this.empty();
260 appendWithoutParsing( $target, failableParserFn( arguments ) );
267 * Describes an object, whose primary duty is to .parse() message keys.
271 * @param {Object} options
273 mw.jqueryMsg.parser = function ( options ) {
274 this.settings = $.extend( {}, parserDefaults, options );
275 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
278 // eslint-disable-next-line new-cap
279 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
282 mw.jqueryMsg.parser.prototype = {
284 * Where the magic happens.
285 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
286 * If an error is thrown, returns original key, and logs the error
288 * @param {string} key Message key.
289 * @param {Array} replacements Variable replacements for $1, $2... $n
292 parse: function ( key, replacements ) {
293 var ast = this.getAst( key );
294 return this.emitter.emit( ast, replacements );
298 * Fetch the message string associated with a key, return parsed structure. Memoized.
299 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
301 * @param {string} key
302 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
304 getAst: function ( key ) {
307 if ( !this.astCache.hasOwnProperty( key ) ) {
308 wikiText = this.settings.messages.get( key );
309 if ( typeof wikiText !== 'string' ) {
310 wikiText = '⧼' + key + '⧽';
312 this.astCache[ key ] = this.wikiTextToAst( wikiText );
314 return this.astCache[ key ];
318 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
320 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
321 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
323 * @param {string} input Message string wikitext
325 * @return {Mixed} abstract syntax tree
327 wikiTextToAst: function ( input ) {
329 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
330 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
331 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
332 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
333 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
334 openExtlink, closeExtlink, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
335 templateContents, openTemplate, closeTemplate,
336 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
337 settings = this.settings,
338 concat = Array.prototype.concat;
340 // Indicates current position in input as we parse through it.
341 // Shared among all parsing functions below.
344 // =========================================================
345 // parsing combinators - could be a library on its own
346 // =========================================================
349 * Try parsers until one works, if none work return null
352 * @param {Function[]} ps
353 * @return {string|null}
355 function choice( ps ) {
358 for ( i = 0; i < ps.length; i++ ) {
360 if ( result !== null ) {
369 * Try several ps in a row, all must succeed or return null.
370 * This is the only eager one.
373 * @param {Function[]} ps
374 * @return {string|null}
376 function sequence( ps ) {
380 for ( i = 0; i < ps.length; i++ ) {
382 if ( res === null ) {
392 * Run the same parser over and over until it fails.
393 * Must succeed a minimum of n times or return null.
397 * @param {Function} p
398 * @return {string|null}
400 function nOrMore( n, p ) {
402 var originalPos = pos,
405 while ( parsed !== null ) {
406 result.push( parsed );
409 if ( result.length < n ) {
418 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
420 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
421 * May be some scoping issue
424 * @param {Function} p
425 * @param {Function} fn
426 * @return {string|null}
428 function transform( p, fn ) {
431 return result === null ? null : fn( result );
436 * Just make parsers out of simpler JS builtin types
441 * @return {string} return.return
443 function makeStringParser( s ) {
447 if ( input.substr( pos, len ) === s ) {
456 * Makes a regex parser, given a RegExp object.
457 * The regex being passed in should start with a ^ to anchor it to the start
461 * @param {RegExp} regex anchored regex
462 * @return {Function} function to parse input based on the regex
464 function makeRegexParser( regex ) {
466 var matches = input.slice( pos ).match( regex );
467 if ( matches === null ) {
470 pos += matches[ 0 ].length;
475 // ===================================================================
476 // General patterns above this line -- wikitext specific parsers below
477 // ===================================================================
479 // Parsing functions follow. All parsing functions work like this:
480 // They don't accept any arguments.
481 // Instead, they just operate non destructively on the string 'input'
482 // As they can consume parts of the string, they advance the shared variable pos,
483 // and return tokens (or whatever else they want to return).
484 // some things are defined as closures and other things as ordinary functions
485 // converting everything to a closure makes it a lot harder to debug... errors pop up
486 // but some debuggers can't tell you exactly where they come from. Also the mutually
487 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
488 // This may be because, to save code, memoization was removed
490 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
491 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
492 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
493 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
495 backslash = makeStringParser( '\\' );
496 doubleQuote = makeStringParser( '"' );
497 singleQuote = makeStringParser( '\'' );
498 anyCharacter = makeRegexParser( /^./ );
500 openHtmlStartTag = makeStringParser( '<' );
501 optionalForwardSlash = makeRegexParser( /^\/?/ );
502 openHtmlEndTag = makeStringParser( '</' );
503 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
504 closeHtmlTag = makeRegexParser( /^\s*>/ );
506 function escapedLiteral() {
507 var result = sequence( [
511 return result === null ? null : result[ 1 ];
513 escapedOrLiteralWithoutSpace = choice( [
515 regularLiteralWithoutSpace
517 escapedOrLiteralWithoutBar = choice( [
519 regularLiteralWithoutBar
521 escapedOrRegularLiteral = choice( [
525 // Used to define "literals" without spaces, in space-delimited situations
526 function literalWithoutSpace() {
527 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
528 return result === null ? null : result.join( '' );
530 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
531 // it is not a literal in the parameter
532 function literalWithoutBar() {
533 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
534 return result === null ? null : result.join( '' );
538 var result = nOrMore( 1, escapedOrRegularLiteral )();
539 return result === null ? null : result.join( '' );
542 function curlyBraceTransformExpressionLiteral() {
543 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
544 return result === null ? null : result.join( '' );
547 asciiAlphabetLiteral = makeRegexParser( /^[A-Za-z]+/ );
548 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
549 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
551 whitespace = makeRegexParser( /^\s+/ );
552 dollar = makeStringParser( '$' );
553 digits = makeRegexParser( /^\d+/ );
555 function replacement() {
556 var result = sequence( [
560 if ( result === null ) {
563 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
565 openExtlink = makeStringParser( '[' );
566 closeExtlink = makeStringParser( ']' );
567 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
569 var result, parsedResult, target;
571 parsedResult = sequence( [
573 nOrMore( 1, nonWhitespaceExpression ),
575 nOrMore( 1, expression ),
578 if ( parsedResult !== null ) {
579 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
580 // passing fancy parameters (like a whole jQuery object or a function) to use for the
581 // link. Check only if it's a single match, since we can either do CONCAT or not for
582 // singles with the same effect.
583 target = parsedResult[ 1 ].length === 1 ?
584 parsedResult[ 1 ][ 0 ] :
585 [ 'CONCAT' ].concat( parsedResult[ 1 ] );
589 [ 'CONCAT' ].concat( parsedResult[ 3 ] )
594 openWikilink = makeStringParser( '[[' );
595 closeWikilink = makeStringParser( ']]' );
596 pipe = makeStringParser( '|' );
598 function template() {
599 var result = sequence( [
604 return result === null ? null : result[ 1 ];
607 function pipedWikilink() {
608 var result = sequence( [
609 nOrMore( 1, paramExpression ),
611 nOrMore( 1, expression )
613 return result === null ? null : [
614 [ 'CONCAT' ].concat( result[ 0 ] ),
615 [ 'CONCAT' ].concat( result[ 2 ] )
619 function unpipedWikilink() {
620 var result = sequence( [
621 nOrMore( 1, paramExpression )
623 return result === null ? null : [
624 [ 'CONCAT' ].concat( result[ 0 ] )
628 wikilinkContents = choice( [
633 function wikilink() {
634 var result, parsedResult, parsedLinkContents;
637 parsedResult = sequence( [
642 if ( parsedResult !== null ) {
643 parsedLinkContents = parsedResult[ 1 ];
644 result = [ 'WIKILINK' ].concat( parsedLinkContents );
649 // TODO: Support data- if appropriate
650 function doubleQuotedHtmlAttributeValue() {
651 var parsedResult = sequence( [
653 htmlDoubleQuoteAttributeValue,
656 return parsedResult === null ? null : parsedResult[ 1 ];
659 function singleQuotedHtmlAttributeValue() {
660 var parsedResult = sequence( [
662 htmlSingleQuoteAttributeValue,
665 return parsedResult === null ? null : parsedResult[ 1 ];
668 function htmlAttribute() {
669 var parsedResult = sequence( [
671 asciiAlphabetLiteral,
674 doubleQuotedHtmlAttributeValue,
675 singleQuotedHtmlAttributeValue
678 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
682 * Checks if HTML is allowed
684 * @param {string} startTagName HTML start tag name
685 * @param {string} endTagName HTML start tag name
686 * @param {Object} attributes array of consecutive key value pairs,
687 * with index 2 * n being a name and 2 * n + 1 the associated value
688 * @return {boolean} true if this is HTML is allowed, false otherwise
690 function isAllowedHtml( startTagName, endTagName, attributes ) {
691 var i, len, attributeName;
693 startTagName = startTagName.toLowerCase();
694 endTagName = endTagName.toLowerCase();
695 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
699 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
700 attributeName = attributes[ i ];
701 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
702 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
710 function htmlAttributes() {
711 var parsedResult = nOrMore( 0, htmlAttribute )();
712 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
713 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
716 // Subset of allowed HTML markup.
717 // Most elements and many attributes allowed on the server are not supported yet.
719 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
720 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
721 startCloseTagPos, endOpenTagPos, endCloseTagPos,
724 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
725 // 1. open through closeHtmlTag
727 // 3. openHtmlEnd through close
728 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
730 startOpenTagPos = pos;
731 parsedOpenTagResult = sequence( [
733 asciiAlphabetLiteral,
735 optionalForwardSlash,
739 if ( parsedOpenTagResult === null ) {
744 startTagName = parsedOpenTagResult[ 1 ];
746 parsedHtmlContents = nOrMore( 0, expression )();
748 startCloseTagPos = pos;
749 parsedCloseTagResult = sequence( [
751 asciiAlphabetLiteral,
755 if ( parsedCloseTagResult === null ) {
756 // Closing tag failed. Return the start tag and contents.
757 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
758 .concat( parsedHtmlContents );
761 endCloseTagPos = pos;
762 endTagName = parsedCloseTagResult[ 1 ];
763 wrappedAttributes = parsedOpenTagResult[ 2 ];
764 attributes = wrappedAttributes.slice( 1 );
765 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
766 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
767 .concat( parsedHtmlContents );
769 // HTML is not allowed, so contents will remain how
770 // it was, while HTML markup at this level will be
772 // E.g. assuming script tags are not allowed:
774 // <script>[[Foo|bar]]</script>
776 // results in '<script>' and '</script>'
777 // (not treated as an HTML tag), surrounding a fully
780 // Concatenate everything from the tag, flattening the contents.
781 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
782 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
788 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
790 var parsedResult, plainText,
793 parsedResult = sequence( [
794 makeStringParser( '<nowiki>' ),
795 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
796 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
797 makeStringParser( '</nowiki>' )
799 if ( parsedResult !== null ) {
800 plainText = parsedResult[ 1 ];
801 result = [ 'CONCAT' ].concat( plainText );
807 templateName = transform(
808 // see $wgLegalTitleChars
809 // not allowing : due to the need to catch "PLURAL:$1"
810 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
811 function ( result ) { return result.toString(); }
813 function templateParam() {
817 nOrMore( 0, paramExpression )
819 if ( result === null ) {
823 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
824 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
827 function templateWithReplacement() {
828 var result = sequence( [
833 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
835 function templateWithOutReplacement() {
836 var result = sequence( [
841 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
843 function templateWithOutFirstParameter() {
844 var result = sequence( [
848 return result === null ? null : [ result[ 0 ], '' ];
850 colon = makeStringParser( ':' );
851 templateContents = choice( [
853 var res = sequence( [
854 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
855 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
856 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
857 nOrMore( 0, templateParam )
859 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
862 var res = sequence( [
864 nOrMore( 0, templateParam )
866 if ( res === null ) {
869 return [ res[ 0 ] ].concat( res[ 1 ] );
872 openTemplate = makeStringParser( '{{' );
873 closeTemplate = makeStringParser( '}}' );
874 nonWhitespaceExpression = choice( [
881 paramExpression = choice( [
889 expression = choice( [
899 // Used when only {{-transformation is wanted, for 'text'
900 // or 'escaped' formats
901 curlyBraceTransformExpression = choice( [
904 curlyBraceTransformExpressionLiteral
910 * @param {Function} rootExpression Root parse function
911 * @return {Array|null}
913 function start( rootExpression ) {
914 var result = nOrMore( 0, rootExpression )();
915 if ( result === null ) {
918 return [ 'CONCAT' ].concat( result );
920 // everything above this point is supposed to be stateless/static, but
921 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
922 // finally let's do some actual work...
924 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
927 * For success, the p must have gotten to the end of the input
928 * and returned a non-null.
929 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
931 if ( result === null || pos !== input.length ) {
932 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
940 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
942 * @param {Object} language
943 * @param {Object} magic
945 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
947 this.language = language;
948 $.each( magic, function ( key, val ) {
949 jmsg[ key.toLowerCase() ] = function () {
955 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
956 * Walk entire node structure, applying replacements and template functions when appropriate
958 * @param {Mixed} node Abstract syntax tree (top node or subnode)
959 * @param {Array} replacements for $1, $2, ... $n
960 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
962 this.emit = function ( node, replacements ) {
963 var ret, subnodes, operation,
965 switch ( typeof node ) {
970 // typeof returns object for arrays
972 // node is an array of nodes
973 subnodes = $.map( node.slice( 1 ), function ( n ) {
974 return jmsg.emit( n, replacements );
976 operation = node[ 0 ].toLowerCase();
977 if ( typeof jmsg[ operation ] === 'function' ) {
978 ret = jmsg[ operation ]( subnodes, replacements );
980 throw new Error( 'Unknown operation "' + operation + '"' );
984 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
985 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
986 // The logical thing is probably to return the empty string here when we encounter undefined.
990 throw new Error( 'Unexpected type in AST: ' + typeof node );
996 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
997 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
998 // If you have 'magic words' then configure the parser to have them upon creation.
1000 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
1001 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
1002 mw.jqueryMsg.htmlEmitter.prototype = {
1004 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1005 * Must return a single node to parents -- a jQuery with synthetic span
1006 * However, unwrap any other synthetic spans in our children and pass them upwards
1008 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
1011 concat: function ( nodes ) {
1012 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1013 $.each( nodes, function ( i, node ) {
1014 // Let jQuery append nodes, arrays of nodes and jQuery objects
1015 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1016 appendWithoutParsing( $span, node );
1022 * Return escaped replacement of correct index, or string if unavailable.
1023 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1024 * if the specified parameter is not found return the same string
1025 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1027 * TODO: Throw error if nodes.length > 1 ?
1029 * @param {Array} nodes List of one element, integer, n >= 0
1030 * @param {Array} replacements List of at least n strings
1031 * @return {string} replacement
1033 replace: function ( nodes, replacements ) {
1034 var index = parseInt( nodes[ 0 ], 10 );
1036 if ( index < replacements.length ) {
1037 return replacements[ index ];
1039 // index not found, fallback to displaying variable
1040 return '$' + ( index + 1 );
1045 * Transform wiki-link
1048 * It only handles basic cases, either no pipe, or a pipe with an explicit
1051 * It does not attempt to handle features like the pipe trick.
1052 * However, the pipe trick should usually not be present in wikitext retrieved
1053 * from the server, since the replacement is done at save time.
1054 * It may, though, if the wikitext appears in extension-controlled content.
1056 * @param {string[]} nodes
1059 wikilink: function ( nodes ) {
1060 var page, anchor, url, $el;
1062 page = textify( nodes[ 0 ] );
1063 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1064 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1065 if ( page.charAt( 0 ) === ':' ) {
1066 page = page.slice( 1 );
1068 url = mw.util.getUrl( page );
1070 if ( nodes.length === 1 ) {
1071 // [[Some Page]] or [[Namespace:Some Page]]
1074 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1075 anchor = nodes[ 1 ];
1078 $el = $( '<a>' ).attr( {
1082 return appendWithoutParsing( $el, anchor );
1086 * Converts array of HTML element key value pairs to object
1088 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1089 * name and 2 * n + 1 the associated value
1090 * @return {Object} Object mapping attribute name to attribute value
1092 htmlattributes: function ( nodes ) {
1093 var i, len, mapping = {};
1094 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1095 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1101 * Handles an (already-validated) HTML element.
1103 * @param {Array} nodes Nodes to process when creating element
1104 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1106 htmlelement: function ( nodes ) {
1107 var tagName, attributes, contents, $element;
1109 tagName = nodes.shift();
1110 attributes = nodes.shift();
1112 $element = $( document.createElement( tagName ) ).attr( attributes );
1113 return appendWithoutParsing( $element, contents );
1117 * Transform parsed structure into external link.
1119 * The "href" can be:
1120 * - a jQuery object, treat it as "enclosing" the link text.
1121 * - a function, treat it as the click handler.
1122 * - a string, or our htmlEmitter jQuery object, treat it as a URI after stringifying.
1124 * TODO: throw an error if nodes.length > 2 ?
1126 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1129 extlink: function ( nodes ) {
1132 contents = nodes[ 1 ];
1133 if ( arg instanceof jQuery && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1137 if ( typeof arg === 'function' ) {
1142 .on( 'click keypress', function ( e ) {
1144 e.type === 'click' ||
1145 e.type === 'keypress' && e.which === 13
1147 arg.call( this, e );
1151 $el.attr( 'href', textify( arg ) );
1154 return appendWithoutParsing( $el.empty(), contents );
1158 * Transform parsed structure into pluralization
1159 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1160 * So convert it back with the current language's convertNumber.
1162 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1163 * @return {string} selected pluralized form according to current language
1165 plural: function ( nodes ) {
1166 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1167 explicitPluralForms = {};
1169 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1170 forms = nodes.slice( 1 );
1171 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1172 form = forms[ formIndex ];
1174 if ( form instanceof jQuery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1175 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1176 firstChild = form.contents().get( 0 );
1177 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1178 firstChildText = firstChild.textContent;
1179 if ( /^\d+=/.test( firstChildText ) ) {
1180 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1181 // Use the digit part as key and rest of first text node and
1182 // rest of child nodes as value.
1183 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1184 explicitPluralForms[ explicitPluralFormNumber ] = form;
1185 forms[ formIndex ] = undefined;
1188 } else if ( /^\d+=/.test( form ) ) {
1189 // Simple explicit plural forms like 12=a dozen
1190 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1191 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1192 forms[ formIndex ] = undefined;
1196 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1197 forms = $.map( forms, function ( form ) {
1201 return this.language.convertPlural( count, forms, explicitPluralForms );
1205 * Transform parsed structure according to gender.
1207 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1209 * The first node must be one of:
1210 * - the mw.user object (or a compatible one)
1211 * - an empty string - indicating the current user, same effect as passing the mw.user object
1212 * - a gender string ('male', 'female' or 'unknown')
1214 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1215 * @return {string} Selected gender form according to current language
1217 gender: function ( nodes ) {
1219 maybeUser = nodes[ 0 ],
1220 forms = nodes.slice( 1 );
1222 if ( maybeUser === '' ) {
1223 maybeUser = mw.user;
1226 // If we are passed a mw.user-like object, check their gender.
1227 // Otherwise, assume the gender string itself was passed .
1228 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1229 gender = maybeUser.options.get( 'gender' );
1234 return this.language.gender( gender, forms );
1238 * Transform parsed structure into grammar conversion.
1239 * Invoked by putting `{{grammar:form|word}}` in a message
1241 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1242 * @return {string} selected grammatical form according to current language
1244 grammar: function ( nodes ) {
1245 var form = nodes[ 0 ],
1247 return word && form && this.language.convertGrammar( word, form );
1251 * Tranform parsed structure into a int: (interface language) message include
1252 * Invoked by putting `{{int:othermessage}}` into a message
1254 * @param {Array} nodes List of nodes
1255 * @return {string} Other message
1257 'int': function ( nodes ) {
1258 var msg = nodes[ 0 ];
1259 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1263 * Get localized namespace name from canonical name or namespace number.
1264 * Invoked by putting `{{ns:foo}}` into a message
1266 * @param {Array} nodes List of nodes
1267 * @return {string} Localized namespace name
1269 ns: function ( nodes ) {
1270 var ns = $.trim( textify( nodes[ 0 ] ) );
1271 if ( !/^\d+$/.test( ns ) ) {
1272 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1274 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1279 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1280 * and outputs it in the localized digit script and formatted with decimal
1281 * separator, according to the current language.
1283 * @param {Array} nodes List of nodes
1284 * @return {number|string} Formatted number
1286 formatnum: function ( nodes ) {
1287 var isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1288 number = nodes[ 0 ];
1290 return this.language.convertNumber( number, isInteger );
1296 * @param {Array} nodes List of nodes
1297 * @return {string} The given text, all in lowercase
1299 lc: function ( nodes ) {
1300 return textify( nodes[ 0 ] ).toLowerCase();
1306 * @param {Array} nodes List of nodes
1307 * @return {string} The given text, all in uppercase
1309 uc: function ( nodes ) {
1310 return textify( nodes[ 0 ] ).toUpperCase();
1314 * Lowercase first letter of input, leaving the rest unchanged
1316 * @param {Array} nodes List of nodes
1317 * @return {string} The given text, with the first character in lowercase
1319 lcfirst: function ( nodes ) {
1320 var text = textify( nodes[ 0 ] );
1321 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1325 * Uppercase first letter of input, leaving the rest unchanged
1327 * @param {Array} nodes List of nodes
1328 * @return {string} The given text, with the first character in uppercase
1330 ucfirst: function ( nodes ) {
1331 var text = textify( nodes[ 0 ] );
1332 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1336 // Deprecated! don't rely on gM existing.
1337 // The window.gM ought not to be required - or if required, not required here.
1338 // But moving it to extensions breaks it (?!)
1339 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1340 // @deprecated since 1.23
1341 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1346 * @see mw.jqueryMsg#getPlugin
1348 $.fn.msg = mw.jqueryMsg.getPlugin();
1350 // Replace the default message parser with jqueryMsg
1351 oldParser = mw.Message.prototype.parser;
1352 mw.Message.prototype.parser = function () {
1353 if ( this.format === 'plain' || !/\{\{|[\[<>&]/.test( this.map.get( this.key ) ) ) {
1354 // Fall back to mw.msg's simple parser
1355 return oldParser.apply( this );
1358 if ( !this.map.hasOwnProperty( this.format ) ) {
1359 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1361 // For format 'escaped', escaping part is handled by mediawiki.js
1365 return this.map[ this.format ]( this.key, this.parameters );
1369 * Parse the message to DOM nodes, rather than HTML string like #parse.
1371 * This method is only available when jqueryMsg is loaded.
1375 * @member mw.Message
1378 mw.Message.prototype.parseDom = ( function () {
1379 var reusableParent = $( '<div>' );
1380 return function () {
1381 return reusableParent.msg( this.key, this.parameters ).contents().detach();
1385 }( mediaWiki, jQuery ) );