2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
15 slice = Array.prototype.slice,
18 SITENAME: mw.config.get( 'wgSiteName' )
20 // Whitelist for allowed HTML elements in wikitext.
21 // Self-closing tags are not currently supported.
22 // Can be populated via setPrivateData().
23 allowedHtmlElements: [],
24 // Key tag name, value allowed attributes for that tag.
25 // See Sanitizer::setupAttributeWhitelist
26 allowedHtmlCommonAttributes: [
39 // Attributes allowed for specific elements.
40 // Key is element name in lower case
41 // Value is array of allowed attributes for that element
42 allowedHtmlAttributesByElement: {},
43 messages: mw.messages,
44 language: mw.language,
46 // Same meaning as in mediawiki.js.
48 // Only 'text', 'parse', and 'escaped' are supported, and the
49 // actual escaping for 'escaped' is done by other code (generally
50 // through mediawiki.js).
52 // However, note that this default only
53 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
54 // is 'text', including when it uses jqueryMsg.
60 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
61 * convert what it detects as an htmlString to an element.
63 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
66 * @param {jQuery} $parent Parent node wrapped by jQuery
67 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
68 * @return {jQuery} $parent
70 function appendWithoutParsing( $parent, children ) {
73 if ( !$.isArray( children ) ) {
74 children = [ children ];
77 for ( i = 0, len = children.length; i < len; i++ ) {
78 if ( typeof children[ i ] !== 'object' ) {
79 children[ i ] = document.createTextNode( children[ i ] );
83 return $parent.append( children );
87 * Decodes the main HTML entities, those encoded by mw.html.escape.
90 * @param {string} encoded Encoded string
91 * @return {string} String with those entities decoded
93 function decodePrimaryHtmlEntities( encoded ) {
95 .replace( /'/g, '\'' )
96 .replace( /"/g, '"' )
97 .replace( /</g, '<' )
98 .replace( />/g, '>' )
99 .replace( /&/g, '&' );
103 * Given parser options, return a function that parses a key and replacements, returning jQuery object
105 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
106 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
107 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
110 * @param {Object} options Parser options
112 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
113 * @return {jQuery} return.return
115 function getFailableParserFn( options ) {
116 var parser = new mw.jqueryMsg.parser( options );
118 return function ( args ) {
121 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
123 return parser.parse( key, argsArray );
125 fallback = parser.settings.messages.get( key );
126 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
127 return $( '<span>' ).text( fallback );
135 * Initialize parser defaults.
137 * ResourceLoaderJqueryMsgModule calls this to provide default values from
138 * Sanitizer.php for allowed HTML elements. To override this data for individual
139 * parsers, pass the relevant options to mw.jqueryMsg.parser.
142 * @param {Object} data
144 mw.jqueryMsg.setParserDefaults = function ( data ) {
145 if ( data.allowedHtmlElements ) {
146 parserDefaults.allowedHtmlElements = data.allowedHtmlElements;
151 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
154 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
155 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
157 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
158 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
160 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
161 * somefunction( a, b, c, d )
163 * somefunction( a, [b, c, d] )
165 * @param {Object} options parser options
166 * @return {Function} Function suitable for assigning to window.gM
167 * @return {string} return.key Message key.
168 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
169 * @return {string} return.return Rendered HTML.
171 mw.jqueryMsg.getMessageFunction = function ( options ) {
172 var failableParserFn = getFailableParserFn( options ),
175 if ( options && options.format !== undefined ) {
176 format = options.format;
178 format = parserDefaults.format;
182 var failableResult = failableParserFn( arguments );
183 if ( format === 'text' || format === 'escaped' ) {
184 return failableResult.text();
186 return failableResult.html();
192 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
193 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
196 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
197 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
198 * $( 'p#headline' ).msg( 'hello-user', userlink );
200 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
201 * somefunction( a, b, c, d )
203 * somefunction( a, [b, c, d] )
205 * We append to 'this', which in a jQuery plugin context will be the selected elements.
207 * @param {Object} options Parser options
208 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
209 * @return {string} return.key Message key.
210 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
211 * @return {jQuery} return.return
213 mw.jqueryMsg.getPlugin = function ( options ) {
214 var failableParserFn = getFailableParserFn( options );
217 var $target = this.empty();
218 // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
219 // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
220 $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
221 appendWithoutParsing( $target, node );
229 * Describes an object, whose primary duty is to .parse() message keys.
233 * @param {Object} options
235 mw.jqueryMsg.parser = function ( options ) {
236 this.settings = $.extend( {}, parserDefaults, options );
237 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
239 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
242 mw.jqueryMsg.parser.prototype = {
244 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
246 * In most cases, the message is a string so this is identical.
247 * (This is why we would like to move this functionality server-side).
249 * The two parts of the key are separated by colon. For example:
251 * "message-key:true": ast
253 * if they key is "message-key" and onlyCurlyBraceTransform is true.
255 * This cache is shared by all instances of mw.jqueryMsg.parser.
257 * NOTE: We promise, it's static - when you create this empty object
258 * in the prototype, each new instance of the class gets a reference
259 * to the same object.
267 * Where the magic happens.
268 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
269 * If an error is thrown, returns original key, and logs the error
271 * @param {string} key Message key.
272 * @param {Array} replacements Variable replacements for $1, $2... $n
275 parse: function ( key, replacements ) {
276 return this.emitter.emit( this.getAst( key ), replacements );
280 * Fetch the message string associated with a key, return parsed structure. Memoized.
281 * Note that we pass '[' + key + ']' back for a missing message here.
283 * @param {string} key
284 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
286 getAst: function ( key ) {
288 cacheKey = [ key, this.settings.onlyCurlyBraceTransform ].join( ':' );
290 if ( this.astCache[ cacheKey ] === undefined ) {
291 wikiText = this.settings.messages.get( key );
292 if ( typeof wikiText !== 'string' ) {
293 wikiText = '\\[' + key + '\\]';
295 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
297 return this.astCache[ cacheKey ];
301 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
303 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
304 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
306 * @param {string} input Message string wikitext
308 * @return {Mixed} abstract syntax tree
310 wikiTextToAst: function ( input ) {
312 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
313 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
314 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
315 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
316 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
317 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
318 templateContents, openTemplate, closeTemplate,
319 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
320 settings = this.settings,
321 concat = Array.prototype.concat;
323 // Indicates current position in input as we parse through it.
324 // Shared among all parsing functions below.
327 // =========================================================
328 // parsing combinators - could be a library on its own
329 // =========================================================
332 * Try parsers until one works, if none work return null
335 * @param {Function[]} ps
336 * @return {string|null}
338 function choice( ps ) {
341 for ( i = 0; i < ps.length; i++ ) {
343 if ( result !== null ) {
352 * Try several ps in a row, all must succeed or return null.
353 * This is the only eager one.
356 * @param {Function[]} ps
357 * @return {string|null}
359 function sequence( ps ) {
363 for ( i = 0; i < ps.length; i++ ) {
365 if ( res === null ) {
375 * Run the same parser over and over until it fails.
376 * Must succeed a minimum of n times or return null.
380 * @param {Function} p
381 * @return {string|null}
383 function nOrMore( n, p ) {
385 var originalPos = pos,
388 while ( parsed !== null ) {
389 result.push( parsed );
392 if ( result.length < n ) {
401 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
403 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
404 * May be some scoping issue
407 * @param {Function} p
408 * @param {Function} fn
409 * @return {string|null}
411 function transform( p, fn ) {
414 return result === null ? null : fn( result );
419 * Just make parsers out of simpler JS builtin types
424 * @return {string} return.return
426 function makeStringParser( s ) {
430 if ( input.substr( pos, len ) === s ) {
439 * Makes a regex parser, given a RegExp object.
440 * The regex being passed in should start with a ^ to anchor it to the start
444 * @param {RegExp} regex anchored regex
445 * @return {Function} function to parse input based on the regex
447 function makeRegexParser( regex ) {
449 var matches = input.slice( pos ).match( regex );
450 if ( matches === null ) {
453 pos += matches[ 0 ].length;
458 // ===================================================================
459 // General patterns above this line -- wikitext specific parsers below
460 // ===================================================================
462 // Parsing functions follow. All parsing functions work like this:
463 // They don't accept any arguments.
464 // Instead, they just operate non destructively on the string 'input'
465 // As they can consume parts of the string, they advance the shared variable pos,
466 // and return tokens (or whatever else they want to return).
467 // some things are defined as closures and other things as ordinary functions
468 // converting everything to a closure makes it a lot harder to debug... errors pop up
469 // but some debuggers can't tell you exactly where they come from. Also the mutually
470 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
471 // This may be because, to save code, memoization was removed
473 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
474 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
475 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
476 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
478 backslash = makeStringParser( '\\' );
479 doubleQuote = makeStringParser( '"' );
480 singleQuote = makeStringParser( '\'' );
481 anyCharacter = makeRegexParser( /^./ );
483 openHtmlStartTag = makeStringParser( '<' );
484 optionalForwardSlash = makeRegexParser( /^\/?/ );
485 openHtmlEndTag = makeStringParser( '</' );
486 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
487 closeHtmlTag = makeRegexParser( /^\s*>/ );
489 function escapedLiteral() {
490 var result = sequence( [
494 return result === null ? null : result[ 1 ];
496 escapedOrLiteralWithoutSpace = choice( [
498 regularLiteralWithoutSpace
500 escapedOrLiteralWithoutBar = choice( [
502 regularLiteralWithoutBar
504 escapedOrRegularLiteral = choice( [
508 // Used to define "literals" without spaces, in space-delimited situations
509 function literalWithoutSpace() {
510 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
511 return result === null ? null : result.join( '' );
513 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
514 // it is not a literal in the parameter
515 function literalWithoutBar() {
516 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
517 return result === null ? null : result.join( '' );
520 // Used for wikilink page names. Like literalWithoutBar, but
521 // without allowing escapes.
522 function unescapedLiteralWithoutBar() {
523 var result = nOrMore( 1, regularLiteralWithoutBar )();
524 return result === null ? null : result.join( '' );
528 var result = nOrMore( 1, escapedOrRegularLiteral )();
529 return result === null ? null : result.join( '' );
532 function curlyBraceTransformExpressionLiteral() {
533 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
534 return result === null ? null : result.join( '' );
537 asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
538 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
539 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
541 whitespace = makeRegexParser( /^\s+/ );
542 dollar = makeStringParser( '$' );
543 digits = makeRegexParser( /^\d+/ );
545 function replacement() {
546 var result = sequence( [
550 if ( result === null ) {
553 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
555 openExtlink = makeStringParser( '[' );
556 closeExtlink = makeStringParser( ']' );
557 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
559 var result, parsedResult;
561 parsedResult = sequence( [
563 nonWhitespaceExpression,
565 nOrMore( 1, expression ),
568 if ( parsedResult !== null ) {
569 result = [ 'EXTLINK', parsedResult[ 1 ] ];
570 // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
571 // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
572 if ( parsedResult[ 3 ].length === 1 ) {
573 result.push( parsedResult[ 3 ][ 0 ] );
575 result.push( [ 'CONCAT' ].concat( parsedResult[ 3 ] ) );
580 // this is the same as the above extlink, except that the url is being passed on as a parameter
581 function extLinkParam() {
582 var result = sequence( [
590 if ( result === null ) {
593 return [ 'EXTLINKPARAM', parseInt( result[ 2 ], 10 ) - 1, result[ 4 ] ];
595 openWikilink = makeStringParser( '[[' );
596 closeWikilink = makeStringParser( ']]' );
597 pipe = makeStringParser( '|' );
599 function template() {
600 var result = sequence( [
605 return result === null ? null : result[ 1 ];
608 wikilinkPage = choice( [
609 unescapedLiteralWithoutBar,
613 function pipedWikilink() {
614 var result = sequence( [
619 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
622 wikilinkContents = choice( [
624 wikilinkPage // unpiped link
627 function wikilink() {
628 var result, parsedResult, parsedLinkContents;
631 parsedResult = sequence( [
636 if ( parsedResult !== null ) {
637 parsedLinkContents = parsedResult[ 1 ];
638 result = [ 'WIKILINK' ].concat( parsedLinkContents );
643 // TODO: Support data- if appropriate
644 function doubleQuotedHtmlAttributeValue() {
645 var parsedResult = sequence( [
647 htmlDoubleQuoteAttributeValue,
650 return parsedResult === null ? null : parsedResult[ 1 ];
653 function singleQuotedHtmlAttributeValue() {
654 var parsedResult = sequence( [
656 htmlSingleQuoteAttributeValue,
659 return parsedResult === null ? null : parsedResult[ 1 ];
662 function htmlAttribute() {
663 var parsedResult = sequence( [
665 asciiAlphabetLiteral,
668 doubleQuotedHtmlAttributeValue,
669 singleQuotedHtmlAttributeValue
672 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
676 * Checks if HTML is allowed
678 * @param {string} startTagName HTML start tag name
679 * @param {string} endTagName HTML start tag name
680 * @param {Object} attributes array of consecutive key value pairs,
681 * with index 2 * n being a name and 2 * n + 1 the associated value
682 * @return {boolean} true if this is HTML is allowed, false otherwise
684 function isAllowedHtml( startTagName, endTagName, attributes ) {
685 var i, len, attributeName;
687 startTagName = startTagName.toLowerCase();
688 endTagName = endTagName.toLowerCase();
689 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
693 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
694 attributeName = attributes[ i ];
695 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
696 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
704 function htmlAttributes() {
705 var parsedResult = nOrMore( 0, htmlAttribute )();
706 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
707 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
710 // Subset of allowed HTML markup.
711 // Most elements and many attributes allowed on the server are not supported yet.
713 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
714 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
715 startCloseTagPos, endOpenTagPos, endCloseTagPos,
718 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
719 // 1. open through closeHtmlTag
721 // 3. openHtmlEnd through close
722 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
724 startOpenTagPos = pos;
725 parsedOpenTagResult = sequence( [
727 asciiAlphabetLiteral,
729 optionalForwardSlash,
733 if ( parsedOpenTagResult === null ) {
738 startTagName = parsedOpenTagResult[ 1 ];
740 parsedHtmlContents = nOrMore( 0, expression )();
742 startCloseTagPos = pos;
743 parsedCloseTagResult = sequence( [
745 asciiAlphabetLiteral,
749 if ( parsedCloseTagResult === null ) {
750 // Closing tag failed. Return the start tag and contents.
751 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
752 .concat( parsedHtmlContents );
755 endCloseTagPos = pos;
756 endTagName = parsedCloseTagResult[ 1 ];
757 wrappedAttributes = parsedOpenTagResult[ 2 ];
758 attributes = wrappedAttributes.slice( 1 );
759 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
760 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
761 .concat( parsedHtmlContents );
763 // HTML is not allowed, so contents will remain how
764 // it was, while HTML markup at this level will be
766 // E.g. assuming script tags are not allowed:
768 // <script>[[Foo|bar]]</script>
770 // results in '<script>' and '</script>'
771 // (not treated as an HTML tag), surrounding a fully
774 // Concatenate everything from the tag, flattening the contents.
775 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
776 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
782 templateName = transform(
783 // see $wgLegalTitleChars
784 // not allowing : due to the need to catch "PLURAL:$1"
785 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
786 function ( result ) { return result.toString(); }
788 function templateParam() {
792 nOrMore( 0, paramExpression )
794 if ( result === null ) {
798 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
799 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
802 function templateWithReplacement() {
803 var result = sequence( [
808 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
810 function templateWithOutReplacement() {
811 var result = sequence( [
816 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
818 function templateWithOutFirstParameter() {
819 var result = sequence( [
823 return result === null ? null : [ result[ 0 ], '' ];
825 colon = makeStringParser( ':' );
826 templateContents = choice( [
828 var res = sequence( [
829 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
830 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
831 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
832 nOrMore( 0, templateParam )
834 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
837 var res = sequence( [
839 nOrMore( 0, templateParam )
841 if ( res === null ) {
844 return [ res[ 0 ] ].concat( res[ 1 ] );
847 openTemplate = makeStringParser( '{{' );
848 closeTemplate = makeStringParser( '}}' );
849 nonWhitespaceExpression = choice( [
857 paramExpression = choice( [
866 expression = choice( [
876 // Used when only {{-transformation is wanted, for 'text'
877 // or 'escaped' formats
878 curlyBraceTransformExpression = choice( [
881 curlyBraceTransformExpressionLiteral
887 * @param {Function} rootExpression root parse function
889 function start( rootExpression ) {
890 var result = nOrMore( 0, rootExpression )();
891 if ( result === null ) {
894 return [ 'CONCAT' ].concat( result );
896 // everything above this point is supposed to be stateless/static, but
897 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
898 // finally let's do some actual work...
900 // If you add another possible rootExpression, you must update the astCache key scheme.
901 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
904 * For success, the p must have gotten to the end of the input
905 * and returned a non-null.
906 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
908 if ( result === null || pos !== input.length ) {
909 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
917 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
919 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
920 this.language = language;
922 $.each( magic, function ( key, val ) {
923 jmsg[ key.toLowerCase() ] = function () {
929 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
930 * Walk entire node structure, applying replacements and template functions when appropriate
932 * @param {Mixed} node Abstract syntax tree (top node or subnode)
933 * @param {Array} replacements for $1, $2, ... $n
934 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
936 this.emit = function ( node, replacements ) {
937 var ret, subnodes, operation,
939 switch ( typeof node ) {
944 // typeof returns object for arrays
946 // node is an array of nodes
947 subnodes = $.map( node.slice( 1 ), function ( n ) {
948 return jmsg.emit( n, replacements );
950 operation = node[ 0 ].toLowerCase();
951 if ( typeof jmsg[ operation ] === 'function' ) {
952 ret = jmsg[ operation ]( subnodes, replacements );
954 throw new Error( 'Unknown operation "' + operation + '"' );
958 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
959 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
960 // The logical thing is probably to return the empty string here when we encounter undefined.
964 throw new Error( 'Unexpected type in AST: ' + typeof node );
970 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
971 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
972 // If you have 'magic words' then configure the parser to have them upon creation.
974 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
975 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
976 mw.jqueryMsg.htmlEmitter.prototype = {
978 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
979 * Must return a single node to parents -- a jQuery with synthetic span
980 * However, unwrap any other synthetic spans in our children and pass them upwards
982 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
985 concat: function ( nodes ) {
986 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
987 $.each( nodes, function ( i, node ) {
988 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
989 $.each( node.contents(), function ( j, childNode ) {
990 appendWithoutParsing( $span, childNode );
993 // Let jQuery append nodes, arrays of nodes and jQuery objects
994 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
995 appendWithoutParsing( $span, node );
1002 * Return escaped replacement of correct index, or string if unavailable.
1003 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1004 * if the specified parameter is not found return the same string
1005 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1007 * TODO: Throw error if nodes.length > 1 ?
1009 * @param {Array} nodes List of one element, integer, n >= 0
1010 * @param {Array} replacements List of at least n strings
1011 * @return {String} replacement
1013 replace: function ( nodes, replacements ) {
1014 var index = parseInt( nodes[ 0 ], 10 );
1016 if ( index < replacements.length ) {
1017 return replacements[ index ];
1019 // index not found, fallback to displaying variable
1020 return '$' + ( index + 1 );
1025 * Transform wiki-link
1028 * It only handles basic cases, either no pipe, or a pipe with an explicit
1031 * It does not attempt to handle features like the pipe trick.
1032 * However, the pipe trick should usually not be present in wikitext retrieved
1033 * from the server, since the replacement is done at save time.
1034 * It may, though, if the wikitext appears in extension-controlled content.
1036 * @param {String[]} nodes
1038 wikilink: function ( nodes ) {
1039 var page, anchor, url;
1042 url = mw.util.getUrl( page );
1044 if ( nodes.length === 1 ) {
1045 // [[Some Page]] or [[Namespace:Some Page]]
1048 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1049 anchor = nodes[ 1 ];
1052 return $( '<a>' ).attr( {
1059 * Converts array of HTML element key value pairs to object
1061 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1062 * name and 2 * n + 1 the associated value
1063 * @return {Object} Object mapping attribute name to attribute value
1065 htmlattributes: function ( nodes ) {
1066 var i, len, mapping = {};
1067 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1068 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1074 * Handles an (already-validated) HTML element.
1076 * @param {Array} nodes Nodes to process when creating element
1077 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1079 htmlelement: function ( nodes ) {
1080 var tagName, attributes, contents, $element;
1082 tagName = nodes.shift();
1083 attributes = nodes.shift();
1085 $element = $( document.createElement( tagName ) ).attr( attributes );
1086 return appendWithoutParsing( $element, contents );
1090 * Transform parsed structure into external link
1091 * If the href is a jQuery object, treat it as "enclosing" the link text.
1093 * - ... function, treat it as the click handler.
1094 * - ... string, treat it as a URI.
1096 * TODO: throw an error if nodes.length > 2 ?
1098 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {String}
1101 extlink: function ( nodes ) {
1104 contents = nodes[ 1 ];
1105 if ( arg instanceof jQuery ) {
1109 if ( typeof arg === 'function' ) {
1110 $el.attr( 'href', '#' )
1111 .click( function ( e ) {
1116 $el.attr( 'href', arg.toString() );
1119 return appendWithoutParsing( $el, contents );
1123 * This is basically use a combination of replace + external link (link with parameter
1124 * as url), but we don't want to run the regular replace here-on: inserting a
1125 * url as href-attribute of a link will automatically escape it already, so
1126 * we don't want replace to (manually) escape it as well.
1128 * TODO: throw error if nodes.length > 1 ?
1130 * @param {Array} nodes List of one element, integer, n >= 0
1131 * @param {Array} replacements List of at least n strings
1132 * @return {string} replacement
1134 extlinkparam: function ( nodes, replacements ) {
1136 index = parseInt( nodes[ 0 ], 10 );
1137 if ( index < replacements.length ) {
1138 replacement = replacements[ index ];
1140 replacement = '$' + ( index + 1 );
1142 return this.extlink( [ replacement, nodes[ 1 ] ] );
1146 * Transform parsed structure into pluralization
1147 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1148 * So convert it back with the current language's convertNumber.
1150 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1151 * @return {string} selected pluralized form according to current language
1153 plural: function ( nodes ) {
1154 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1155 explicitPluralForms = {};
1157 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1158 forms = nodes.slice( 1 );
1159 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1160 form = forms[ formIndex ];
1162 if ( form.jquery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1163 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1164 firstChild = form.contents().get( 0 );
1165 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1166 firstChildText = firstChild.textContent;
1167 if ( /^\d+=/.test( firstChildText ) ) {
1168 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1169 // Use the digit part as key and rest of first text node and
1170 // rest of child nodes as value.
1171 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1172 explicitPluralForms[ explicitPluralFormNumber ] = form;
1173 forms[ formIndex ] = undefined;
1176 } else if ( /^\d+=/.test( form ) ) {
1177 // Simple explicit plural forms like 12=a dozen
1178 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1179 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1180 forms[ formIndex ] = undefined;
1184 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1185 forms = $.map( forms, function ( form ) {
1189 return this.language.convertPlural( count, forms, explicitPluralForms );
1193 * Transform parsed structure according to gender.
1195 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1197 * The first node must be one of:
1198 * - the mw.user object (or a compatible one)
1199 * - an empty string - indicating the current user, same effect as passing the mw.user object
1200 * - a gender string ('male', 'female' or 'unknown')
1202 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1203 * @return {string} Selected gender form according to current language
1205 gender: function ( nodes ) {
1207 maybeUser = nodes[ 0 ],
1208 forms = nodes.slice( 1 );
1210 if ( maybeUser === '' ) {
1211 maybeUser = mw.user;
1214 // If we are passed a mw.user-like object, check their gender.
1215 // Otherwise, assume the gender string itself was passed .
1216 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1217 gender = maybeUser.options.get( 'gender' );
1222 return this.language.gender( gender, forms );
1226 * Transform parsed structure into grammar conversion.
1227 * Invoked by putting `{{grammar:form|word}}` in a message
1229 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1230 * @return {string} selected grammatical form according to current language
1232 grammar: function ( nodes ) {
1233 var form = nodes[ 0 ],
1235 return word && form && this.language.convertGrammar( word, form );
1239 * Tranform parsed structure into a int: (interface language) message include
1240 * Invoked by putting `{{int:othermessage}}` into a message
1242 * @param {Array} nodes List of nodes
1243 * @return {string} Other message
1245 'int': function ( nodes ) {
1246 var msg = nodes[ 0 ];
1247 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1251 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1252 * and outputs it in the localized digit script and formatted with decimal
1253 * separator, according to the current language.
1255 * @param {Array} nodes List of nodes
1256 * @return {number|string} Formatted number
1258 formatnum: function ( nodes ) {
1259 var isInteger = ( nodes[ 1 ] && nodes[ 1 ] === 'R' ) ? true : false,
1260 number = nodes[ 0 ];
1262 return this.language.convertNumber( number, isInteger );
1266 // Deprecated! don't rely on gM existing.
1267 // The window.gM ought not to be required - or if required, not required here.
1268 // But moving it to extensions breaks it (?!)
1269 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1270 // @deprecated since 1.23
1271 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1276 * @see mw.jqueryMsg#getPlugin
1278 $.fn.msg = mw.jqueryMsg.getPlugin();
1280 // Replace the default message parser with jqueryMsg
1281 oldParser = mw.Message.prototype.parser;
1282 mw.Message.prototype.parser = function () {
1283 var messageFunction;
1285 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1286 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1287 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1288 // Do not use mw.jqueryMsg unless required
1289 if ( this.format === 'plain' || !/\{\{|[\[<>]/.test( this.map.get( this.key ) ) ) {
1290 // Fall back to mw.msg's simple parser
1291 return oldParser.apply( this );
1294 messageFunction = mw.jqueryMsg.getMessageFunction( {
1296 // For format 'escaped', escaping part is handled by mediawiki.js
1299 return messageFunction( this.key, this.parameters );
1302 }( mediaWiki, jQuery ) );