2 * Experimental advanced wikitext parser-emitter.
3 * See: http://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
9 slice = Array.prototype.slice,
12 'SITENAME' : mw.config.get( 'wgSiteName' )
14 messages : mw.messages,
15 language : mw.language,
17 // Same meaning as in mediawiki.js.
19 // Only 'text', 'parse', and 'escaped' are supported, and the
20 // actual escaping for 'escaped' is done by other code (generally
21 // through jqueryMsg).
23 // However, note that this default only
24 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
25 // is 'text', including when it uses jqueryMsg.
31 * Given parser options, return a function that parses a key and replacements, returning jQuery object
32 * @param {Object} parser options
33 * @return {Function} accepting ( String message key, String replacement1, String replacement2 ... ) and returning {jQuery}
35 function getFailableParserFn( options ) {
36 var parser = new mw.jqueryMsg.parser( options );
38 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
39 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
40 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
42 * @param {Array} first element is the key, replacements may be in array in 2nd element, or remaining elements.
45 return function ( args ) {
47 argsArray = $.isArray( args[1] ) ? args[1] : slice.call( args, 1 );
49 return parser.parse( key, argsArray );
51 return $( '<span>' ).append( key + ': ' + e.message );
60 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
62 * window.gM = mediaWiki.parser.getMessageFunction( options );
63 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
65 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
66 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
68 * @param {Array} parser options
69 * @return {Function} function suitable for assigning to window.gM
71 mw.jqueryMsg.getMessageFunction = function ( options ) {
72 var failableParserFn = getFailableParserFn( options ),
75 if ( options && options.format !== undefined ) {
76 format = options.format;
78 format = parserDefaults.format;
82 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
83 * somefunction(a, b, c, d)
85 * somefunction(a, [b, c, d])
87 * @param {string} key Message key.
88 * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
89 * @return {string} Rendered HTML.
92 var failableResult = failableParserFn( arguments );
93 if ( format === 'text' || format === 'escaped' ) {
94 return failableResult.text();
96 return failableResult.html();
103 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
104 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
106 * $.fn.msg = mediaWiki.parser.getJqueryPlugin( options );
107 * var userlink = $( '<a>' ).click( function () { alert( "hello!!") } );
108 * $( 'p#headline' ).msg( 'hello-user', userlink );
110 * @param {Array} parser options
111 * @return {Function} function suitable for assigning to jQuery plugin, such as $.fn.msg
113 mw.jqueryMsg.getPlugin = function ( options ) {
114 var failableParserFn = getFailableParserFn( options );
116 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
117 * somefunction(a, b, c, d)
119 * somefunction(a, [b, c, d])
121 * We append to 'this', which in a jQuery plugin context will be the selected elements.
122 * @param {string} key Message key.
123 * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
124 * @return {jQuery} this
127 var $target = this.empty();
128 // TODO: Simply $target.append( failableParserFn( arguments ).contents() )
129 // or Simply $target.append( failableParserFn( arguments ) )
130 $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
131 $target.append( node );
139 * Describes an object, whose primary duty is to .parse() message keys.
140 * @param {Array} options
142 mw.jqueryMsg.parser = function ( options ) {
143 this.settings = $.extend( {}, parserDefaults, options );
144 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
146 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
149 mw.jqueryMsg.parser.prototype = {
151 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
153 * In most cases, the message is a string so this is identical.
154 * (This is why we would like to move this functionality server-side).
156 * The two parts of the key are separated by colon. For example:
158 * "message-key:true": ast
160 * if they key is "message-key" and onlyCurlyBraceTransform is true.
162 * This cache is shared by all instances of mw.jqueryMsg.parser.
169 * Where the magic happens.
170 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
171 * If an error is thrown, returns original key, and logs the error
172 * @param {String} key Message key.
173 * @param {Array} replacements Variable replacements for $1, $2... $n
176 parse: function ( key, replacements ) {
177 return this.emitter.emit( this.getAst( key ), replacements );
180 * Fetch the message string associated with a key, return parsed structure. Memoized.
181 * Note that we pass '[' + key + ']' back for a missing message here.
182 * @param {String} key
183 * @return {String|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
185 getAst: function ( key ) {
186 var cacheKey = [key, this.settings.onlyCurlyBraceTransform].join( ':' ), wikiText;
188 if ( this.astCache[ cacheKey ] === undefined ) {
189 wikiText = this.settings.messages.get( key );
190 if ( typeof wikiText !== 'string' ) {
191 wikiText = '\\[' + key + '\\]';
193 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
195 return this.astCache[ cacheKey ];
199 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
201 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
202 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
204 * @param {String} message string wikitext
206 * @return {Mixed} abstract syntax tree
208 wikiTextToAst: function ( input ) {
210 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
211 backslash, anyCharacter, escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
212 whitespace, dollar, digits,
213 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openLink, closeLink, templateName, pipe, colon,
214 templateContents, openTemplate, closeTemplate,
215 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result;
217 // Indicates current position in input as we parse through it.
218 // Shared among all parsing functions below.
221 // =========================================================
222 // parsing combinators - could be a library on its own
223 // =========================================================
224 // Try parsers until one works, if none work return null
225 function choice( ps ) {
228 for ( i = 0; i < ps.length; i++ ) {
230 if ( result !== null ) {
237 // try several ps in a row, all must succeed or return null
238 // this is the only eager one
239 function sequence( ps ) {
243 for ( i = 0; i < ps.length; i++ ) {
245 if ( res === null ) {
253 // run the same parser over and over until it fails.
254 // must succeed a minimum of n times or return null
255 function nOrMore( n, p ) {
257 var originalPos = pos,
260 while ( parsed !== null ) {
261 result.push( parsed );
264 if ( result.length < n ) {
271 // There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
272 // But using this as a combinator seems to cause problems when combined with nOrMore().
273 // May be some scoping issue
274 function transform( p, fn ) {
277 return result === null ? null : fn( result );
280 // Helpers -- just make ps out of simpler JS builtin types
281 function makeStringParser( s ) {
285 if ( input.substr( pos, len ) === s ) {
292 function makeRegexParser( regex ) {
294 var matches = input.substr( pos ).match( regex );
295 if ( matches === null ) {
298 pos += matches[0].length;
304 * ===================================================================
305 * General patterns above this line -- wikitext specific parsers below
306 * ===================================================================
308 // Parsing functions follow. All parsing functions work like this:
309 // They don't accept any arguments.
310 // Instead, they just operate non destructively on the string 'input'
311 // As they can consume parts of the string, they advance the shared variable pos,
312 // and return tokens (or whatever else they want to return).
313 // some things are defined as closures and other things as ordinary functions
314 // converting everything to a closure makes it a lot harder to debug... errors pop up
315 // but some debuggers can't tell you exactly where they come from. Also the mutually
316 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
317 // This may be because, to save code, memoization was removed
318 regularLiteral = makeRegexParser( /^[^{}\[\]$\\]/ );
319 regularLiteralWithoutBar = makeRegexParser(/^[^{}\[\]$\\|]/);
320 regularLiteralWithoutSpace = makeRegexParser(/^[^{}\[\]$\s]/);
321 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
322 backslash = makeStringParser( '\\' );
323 anyCharacter = makeRegexParser( /^./ );
324 function escapedLiteral() {
325 var result = sequence( [
329 return result === null ? null : result[1];
331 escapedOrLiteralWithoutSpace = choice( [
333 regularLiteralWithoutSpace
335 escapedOrLiteralWithoutBar = choice( [
337 regularLiteralWithoutBar
339 escapedOrRegularLiteral = choice( [
343 // Used to define "literals" without spaces, in space-delimited situations
344 function literalWithoutSpace() {
345 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
346 return result === null ? null : result.join('');
348 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
349 // it is not a literal in the parameter
350 function literalWithoutBar() {
351 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
352 return result === null ? null : result.join('');
355 // Used for wikilink page names. Like literalWithoutBar, but
356 // without allowing escapes.
357 function unescapedLiteralWithoutBar() {
358 var result = nOrMore( 1, regularLiteralWithoutBar )();
359 return result === null ? null : result.join('');
363 var result = nOrMore( 1, escapedOrRegularLiteral )();
364 return result === null ? null : result.join('');
367 function curlyBraceTransformExpressionLiteral() {
368 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
369 return result === null ? null : result.join('');
372 whitespace = makeRegexParser( /^\s+/ );
373 dollar = makeStringParser( '$' );
374 digits = makeRegexParser( /^\d+/ );
376 function replacement() {
377 var result = sequence( [
381 if ( result === null ) {
384 return [ 'REPLACE', parseInt( result[1], 10 ) - 1 ];
386 openExtlink = makeStringParser( '[' );
387 closeExtlink = makeStringParser( ']' );
388 // this extlink MUST have inner text, e.g. [foo] not allowed; [foo bar] is allowed
390 var result, parsedResult;
392 parsedResult = sequence( [
394 nonWhitespaceExpression,
399 if ( parsedResult !== null ) {
400 result = [ 'LINK', parsedResult[1], parsedResult[3] ];
404 // this is the same as the above extlink, except that the url is being passed on as a parameter
405 function extLinkParam() {
406 var result = sequence( [
414 if ( result === null ) {
417 return [ 'LINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
419 openLink = makeStringParser( '[[' );
420 closeLink = makeStringParser( ']]' );
421 pipe = makeStringParser( '|' );
423 function template() {
424 var result = sequence( [
429 return result === null ? null : result[1];
432 wikilinkPage = choice( [
433 unescapedLiteralWithoutBar,
437 function pipedWikilink() {
438 var result = sequence( [
443 return result === null ? null : [ result[0], result[2] ];
446 wikilinkContents = choice( [
448 wikilinkPage // unpiped link
452 var result, parsedResult, parsedLinkContents;
455 parsedResult = sequence( [
460 if ( parsedResult !== null ) {
461 parsedLinkContents = parsedResult[1];
462 result = [ 'WLINK' ].concat( parsedLinkContents );
466 templateName = transform(
467 // see $wgLegalTitleChars
468 // not allowing : due to the need to catch "PLURAL:$1"
469 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
470 function ( result ) { return result.toString(); }
472 function templateParam() {
476 nOrMore( 0, paramExpression )
478 if ( result === null ) {
482 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
483 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[0];
486 function templateWithReplacement() {
487 var result = sequence( [
492 return result === null ? null : [ result[0], result[2] ];
494 function templateWithOutReplacement() {
495 var result = sequence( [
500 return result === null ? null : [ result[0], result[2] ];
502 colon = makeStringParser(':');
503 templateContents = choice( [
505 var res = sequence( [
506 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
507 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
508 choice( [ templateWithReplacement, templateWithOutReplacement ] ),
509 nOrMore( 0, templateParam )
511 return res === null ? null : res[0].concat( res[1] );
514 var res = sequence( [
516 nOrMore( 0, templateParam )
518 if ( res === null ) {
521 return [ res[0] ].concat( res[1] );
524 openTemplate = makeStringParser('{{');
525 closeTemplate = makeStringParser('}}');
526 nonWhitespaceExpression = choice( [
534 paramExpression = choice( [
543 expression = choice( [
552 // Used when only {{-transformation is wanted, for 'text'
553 // or 'escaped' formats
554 curlyBraceTransformExpression = choice( [
557 curlyBraceTransformExpressionLiteral
564 * @param {Function} rootExpression root parse function
566 function start( rootExpression ) {
567 var result = nOrMore( 0, rootExpression )();
568 if ( result === null ) {
571 return [ 'CONCAT' ].concat( result );
573 // everything above this point is supposed to be stateless/static, but
574 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
575 // finally let's do some actual work...
577 // If you add another possible rootExpression, you must update the astCache key scheme.
578 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
581 * For success, the p must have gotten to the end of the input
582 * and returned a non-null.
583 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
585 if ( result === null || pos !== input.length ) {
586 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
593 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
595 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
596 this.language = language;
598 $.each( magic, function ( key, val ) {
599 jmsg[ key.toLowerCase() ] = function () {
604 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
605 * Walk entire node structure, applying replacements and template functions when appropriate
606 * @param {Mixed} abstract syntax tree (top node or subnode)
607 * @param {Array} replacements for $1, $2, ... $n
608 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
610 this.emit = function ( node, replacements ) {
611 var ret, subnodes, operation,
613 switch ( typeof node ) {
618 // typeof returns object for arrays
620 // node is an array of nodes
621 subnodes = $.map( node.slice( 1 ), function ( n ) {
622 return jmsg.emit( n, replacements );
624 operation = node[0].toLowerCase();
625 if ( typeof jmsg[operation] === 'function' ) {
626 ret = jmsg[ operation ]( subnodes, replacements );
628 throw new Error( 'Unknown operation "' + operation + '"' );
632 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
633 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
634 // The logical thing is probably to return the empty string here when we encounter undefined.
638 throw new Error( 'Unexpected type in AST: ' + typeof node );
643 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
644 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
645 // If you have 'magic words' then configure the parser to have them upon creation.
647 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
648 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
649 mw.jqueryMsg.htmlEmitter.prototype = {
651 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
652 * Must return a single node to parents -- a jQuery with synthetic span
653 * However, unwrap any other synthetic spans in our children and pass them upwards
654 * @param {Array} nodes - mixed, some single nodes, some arrays of nodes
657 concat: function ( nodes ) {
658 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
659 $.each( nodes, function ( i, node ) {
660 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
661 $.each( node.contents(), function ( j, childNode ) {
662 $span.append( childNode );
665 // Let jQuery append nodes, arrays of nodes and jQuery objects
666 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
667 $span.append( $.type( node ) === 'object' ? node : document.createTextNode( node ) );
674 * Return escaped replacement of correct index, or string if unavailable.
675 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
676 * if the specified parameter is not found return the same string
677 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
678 * TODO: Throw error if nodes.length > 1 ?
679 * @param {Array} of one element, integer, n >= 0
680 * @return {String} replacement
682 replace: function ( nodes, replacements ) {
683 var index = parseInt( nodes[0], 10 );
685 if ( index < replacements.length ) {
686 return replacements[index];
688 // index not found, fallback to displaying variable
689 return '$' + ( index + 1 );
694 * Transform wiki-link
697 * It only handles basic cases, either no pipe, or a pipe with an explicit
700 * It does not attempt to handle features like the pipe trick.
701 * However, the pipe trick should usually not be present in wikitext retrieved
702 * from the server, since the replacement is done at save time.
703 * It may, though, if the wikitext appears in extension-controlled content.
707 wlink: function ( nodes ) {
708 var page, anchor, url;
711 url = mw.util.wikiGetlink( page );
713 // [[Some Page]] or [[Namespace:Some Page]]
714 if ( nodes.length === 1 ) {
719 * [[Some Page|anchor text]] or
720 * [[Namespace:Some Page|anchor]
726 return $( '<a />' ).attr( {
733 * Transform parsed structure into external link
734 * If the href is a jQuery object, treat it as "enclosing" the link text.
735 * ... function, treat it as the click handler
736 * ... string, treat it as a URI
737 * TODO: throw an error if nodes.length > 2 ?
738 * @param {Array} of two elements, {jQuery|Function|String} and {String}
741 link: function ( nodes ) {
745 if ( arg instanceof jQuery ) {
749 if ( typeof arg === 'function' ) {
750 $el.click( arg ).attr( 'href', '#' );
752 $el.attr( 'href', arg.toString() );
755 $el.append( contents );
760 * This is basically use a combination of replace + link (link with parameter
761 * as url), but we don't want to run the regular replace here-on: inserting a
762 * url as href-attribute of a link will automatically escape it already, so
763 * we don't want replace to (manually) escape it as well.
764 * TODO throw error if nodes.length > 1 ?
765 * @param {Array} of one element, integer, n >= 0
766 * @return {String} replacement
768 linkparam: function ( nodes, replacements ) {
770 index = parseInt( nodes[0], 10 );
771 if ( index < replacements.length) {
772 replacement = replacements[index];
774 replacement = '$' + ( index + 1 );
776 return this.link( [ replacement, nodes[1] ] );
780 * Transform parsed structure into pluralization
781 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
782 * So convert it back with the current language's convertNumber.
783 * @param {Array} of nodes, [ {String|Number}, {String}, {String} ... ]
784 * @return {String} selected pluralized form according to current language
786 plural: function ( nodes ) {
788 count = parseFloat( this.language.convertNumber( nodes[0], true ) );
789 forms = nodes.slice(1);
790 return forms.length ? this.language.convertPlural( count, forms ) : '';
794 * Transform parsed structure according to gender.
795 * Usage {{gender:[ gender | mw.user object ] | masculine form|feminine form|neutral form}}.
796 * The first node is either a string, which can be "male" or "female",
797 * or a User object (not a username).
799 * @param {Array} of nodes, [ {String|mw.User}, {String}, {String}, {String} ]
800 * @return {String} selected gender form according to current language
802 gender: function ( nodes ) {
805 if ( nodes[0] && nodes[0].options instanceof mw.Map ) {
806 gender = nodes[0].options.get( 'gender' );
811 forms = nodes.slice( 1 );
813 return this.language.gender( gender, forms );
817 * Transform parsed structure into grammar conversion.
818 * Invoked by putting {{grammar:form|word}} in a message
819 * @param {Array} of nodes [{Grammar case eg: genitive}, {String word}]
820 * @return {String} selected grammatical form according to current language
822 grammar: function ( nodes ) {
825 return word && form && this.language.convertGrammar( word, form );
829 * Tranform parsed structure into a int: (interface language) message include
830 * Invoked by putting {{int:othermessage}} into a message
831 * @param {Array} of nodes
832 * @return {string} Other message
834 int: function ( nodes ) {
835 return mw.jqueryMsg.getMessageFunction()( nodes[0].toLowerCase() );
839 * Takes an unformatted number (arab, no group separators and . as decimal separator)
840 * and outputs it in the localized digit script and formatted with decimal
841 * separator, according to the current language
842 * @param {Array} of nodes
843 * @return {Number|String} formatted number
845 formatnum: function ( nodes ) {
846 var isInteger = ( nodes[1] && nodes[1] === 'R' ) ? true : false,
849 return this.language.convertNumber( number, isInteger );
852 // Deprecated! don't rely on gM existing.
853 // The window.gM ought not to be required - or if required, not required here.
854 // But moving it to extensions breaks it (?!)
855 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
856 window.gM = mw.jqueryMsg.getMessageFunction();
857 $.fn.msg = mw.jqueryMsg.getPlugin();
859 // Replace the default message parser with jqueryMsg
860 oldParser = mw.Message.prototype.parser;
861 mw.Message.prototype.parser = function () {
864 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
865 // Caching is somewhat problematic, because we do need different message functions for different maps, so
866 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
867 // Do not use mw.jqueryMsg unless required
868 if ( this.format === 'plain' || !/\{\{|\[/.test(this.map.get( this.key ) ) ) {
869 // Fall back to mw.msg's simple parser
870 return oldParser.apply( this );
873 messageFunction = mw.jqueryMsg.getMessageFunction( {
874 'messages': this.map,
875 // For format 'escaped', escaping part is handled by mediawiki.js
876 'format': this.format
878 return messageFunction( this.key, this.parameters );
881 }( mediaWiki, jQuery ) );