Non-word characters don't terminate tag names.
[mediawiki.git] / resources / mediawiki / mediawiki.jqueryMsg.js
blob5539d4dbc4dd266f7031666a9f4d44afabea0f9e
1 /**
2 * Experimental advanced wikitext parser-emitter.
3 * See: http://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9         var oldParser,
10                 slice = Array.prototype.slice,
11                 parserDefaults = {
12                         magic : {
13                                 'SITENAME' : mw.config.get( 'wgSiteName' )
14                         },
15                         // This is a whitelist based on, but simpler than, Sanitizer.php.
16                         // Self-closing tags are not currently supported.
17                         allowedHtmlElements : [
18                                 'b',
19                                 'i'
20                         ],
21                         // Key tag name, value allowed attributes for that tag.
22                         // See Sanitizer::setupAttributeWhitelist
23                         allowedHtmlCommonAttributes : [
24                                 // HTML
25                                 'id',
26                                 'class',
27                                 'style',
28                                 'lang',
29                                 'dir',
30                                 'title',
32                                 // WAI-ARIA
33                                 'role'
34                         ],
36                         // Attributes allowed for specific elements.
37                         // Key is element name in lower case
38                         // Value is array of allowed attributes for that element
39                         allowedHtmlAttributesByElement : {},
40                         messages : mw.messages,
41                         language : mw.language,
43                         // Same meaning as in mediawiki.js.
44                         //
45                         // Only 'text', 'parse', and 'escaped' are supported, and the
46                         // actual escaping for 'escaped' is done by other code (generally
47                         // through jqueryMsg).
48                         //
49                         // However, note that this default only
50                         // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
51                         // is 'text', including when it uses jqueryMsg.
52                         format: 'parse'
54                 };
56         /**
57          * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
58          * convert what it detects as an htmlString to an element.
59          *
60          * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
61          *
62          * @param {jQuery} $parent Parent node wrapped by jQuery
63          * @param {Object|string|Array} children What to append, with the same possible types as jQuery
64          * @return {jQuery} $parent
65          */
66         function appendWithoutParsing( $parent, children ) {
67                 var i, len;
69                 if ( !$.isArray( children ) ) {
70                         children = [children];
71                 }
73                 for ( i = 0, len = children.length; i < len; i++ ) {
74                         if ( typeof children[i] !== 'object' ) {
75                                 children[i] = document.createTextNode( children[i] );
76                         }
77                 }
79                 return $parent.append( children );
80         }
82         /**
83          * Decodes the main HTML entities, those encoded by mw.html.escape.
84          *
85          * @param {string} encode Encoded string
86          * @return {string} String with those entities decoded
87          */
88         function decodePrimaryHtmlEntities( encoded ) {
89                 return encoded
90                         .replace( /&#039;/g, '\'' )
91                         .replace( /&quot;/g, '"' )
92                         .replace( /&lt;/g, '<' )
93                         .replace( /&gt;/g, '>' )
94                         .replace( /&amp;/g, '&' );
95         }
97         /**
98          * Given parser options, return a function that parses a key and replacements, returning jQuery object
99          * @param {Object} parser options
100          * @return {Function} accepting ( String message key, String replacement1, String replacement2 ... ) and returning {jQuery}
101          */
102         function getFailableParserFn( options ) {
103                 var parser = new mw.jqueryMsg.parser( options );
104                 /**
105                  * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
106                  * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
107                  * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
108                  *
109                  * @param {Array} first element is the key, replacements may be in array in 2nd element, or remaining elements.
110                  * @return {jQuery}
111                  */
112                 return function ( args ) {
113                         var key = args[0],
114                                 argsArray = $.isArray( args[1] ) ? args[1] : slice.call( args, 1 );
115                         try {
116                                 return parser.parse( key, argsArray );
117                         } catch ( e ) {
118                                 return $( '<span>' ).text( key + ': ' + e.message );
119                         }
120                 };
121         }
123         mw.jqueryMsg = {};
125         /**
126          * Class method.
127          * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
128          * e.g.
129          *       window.gM = mediaWiki.parser.getMessageFunction( options );
130          *       $( 'p#headline' ).html( gM( 'hello-user', username ) );
131          *
132          * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
133          * jQuery plugin version instead. This is only included for backwards compatibility with gM().
134          *
135          * @param {Array} parser options
136          * @return {Function} function suitable for assigning to window.gM
137          */
138         mw.jqueryMsg.getMessageFunction = function ( options ) {
139                 var failableParserFn = getFailableParserFn( options ),
140                         format;
142                 if ( options && options.format !== undefined ) {
143                         format = options.format;
144                 } else {
145                         format = parserDefaults.format;
146                 }
148                 /**
149                  * N.B. replacements are variadic arguments or an array in second parameter. In other words:
150                  *    somefunction(a, b, c, d)
151                  * is equivalent to
152                  *    somefunction(a, [b, c, d])
153                  *
154                  * @param {string} key Message key.
155                  * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
156                  * @return {string} Rendered HTML.
157                  */
158                 return function () {
159                         var failableResult = failableParserFn( arguments );
160                         if ( format === 'text' || format === 'escaped' ) {
161                                 return failableResult.text();
162                         } else {
163                                 return failableResult.html();
164                         }
165                 };
166         };
168         /**
169          * Class method.
170          * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
171          * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
172          * e.g.
173          *        $.fn.msg = mediaWiki.parser.getJqueryPlugin( options );
174          *        var userlink = $( '<a>' ).click( function () { alert( "hello!!") } );
175          *        $( 'p#headline' ).msg( 'hello-user', userlink );
176          *
177          * @param {Array} parser options
178          * @return {Function} function suitable for assigning to jQuery plugin, such as $.fn.msg
179          */
180         mw.jqueryMsg.getPlugin = function ( options ) {
181                 var failableParserFn = getFailableParserFn( options );
182                 /**
183                  * N.B. replacements are variadic arguments or an array in second parameter. In other words:
184                  *    somefunction(a, b, c, d)
185                  * is equivalent to
186                  *    somefunction(a, [b, c, d])
187                  *
188                  * We append to 'this', which in a jQuery plugin context will be the selected elements.
189                  * @param {string} key Message key.
190                  * @param {Array|mixed} replacements Optional variable replacements (variadically or an array).
191                  * @return {jQuery} this
192                  */
193                 return function () {
194                         var $target = this.empty();
195                         // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
196                         // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
197                         $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
198                                 appendWithoutParsing( $target, node );
199                         } );
200                         return $target;
201                 };
202         };
204         /**
205          * The parser itself.
206          * Describes an object, whose primary duty is to .parse() message keys.
207          * @param {Array} options
208          */
209         mw.jqueryMsg.parser = function ( options ) {
210                 this.settings = $.extend( {}, parserDefaults, options );
211                 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
213                 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
214         };
216         mw.jqueryMsg.parser.prototype = {
217                 /**
218                  * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
219                  *
220                  * In most cases, the message is a string so this is identical.
221                  * (This is why we would like to move this functionality server-side).
222                  *
223                  * The two parts of the key are separated by colon.  For example:
224                  *
225                  * "message-key:true": ast
226                  *
227                  * if they key is "message-key" and onlyCurlyBraceTransform is true.
228                  *
229                  * This cache is shared by all instances of mw.jqueryMsg.parser.
230                  *
231                  * @static
232                  */
233                 astCache: {},
235                 /**
236                  * Where the magic happens.
237                  * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
238                  * If an error is thrown, returns original key, and logs the error
239                  * @param {String} key Message key.
240                  * @param {Array} replacements Variable replacements for $1, $2... $n
241                  * @return {jQuery}
242                  */
243                 parse: function ( key, replacements ) {
244                         return this.emitter.emit( this.getAst( key ), replacements );
245                 },
246                 /**
247                  * Fetch the message string associated with a key, return parsed structure. Memoized.
248                  * Note that we pass '[' + key + ']' back for a missing message here.
249                  * @param {String} key
250                  * @return {String|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
251                  */
252                 getAst: function ( key ) {
253                         var cacheKey = [key, this.settings.onlyCurlyBraceTransform].join( ':' ), wikiText;
255                         if ( this.astCache[ cacheKey ] === undefined ) {
256                                 wikiText = this.settings.messages.get( key );
257                                 if ( typeof wikiText !== 'string' ) {
258                                         wikiText = '\\[' + key + '\\]';
259                                 }
260                                 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
261                         }
262                         return this.astCache[ cacheKey ];
263                 },
265                 /**
266                  * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
267                  *
268                  * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
269                  * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
270                  *
271                  * @param {String} message string wikitext
272                  * @throws Error
273                  * @return {Mixed} abstract syntax tree
274                  */
275                 wikiTextToAst: function ( input ) {
276                         var pos, settings = this.settings, concat = Array.prototype.concat,
277                                 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
278                                 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
279                                 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
280                                 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
281                                 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
282                                 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
283                                 templateContents, openTemplate, closeTemplate,
284                                 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result;
286                         // Indicates current position in input as we parse through it.
287                         // Shared among all parsing functions below.
288                         pos = 0;
290                         // =========================================================
291                         // parsing combinators - could be a library on its own
292                         // =========================================================
293                         // Try parsers until one works, if none work return null
294                         function choice( ps ) {
295                                 return function () {
296                                         var i, result;
297                                         for ( i = 0; i < ps.length; i++ ) {
298                                                 result = ps[i]();
299                                                 if ( result !== null ) {
300                                                          return result;
301                                                 }
302                                         }
303                                         return null;
304                                 };
305                         }
306                         // try several ps in a row, all must succeed or return null
307                         // this is the only eager one
308                         function sequence( ps ) {
309                                 var i, res,
310                                         originalPos = pos,
311                                         result = [];
312                                 for ( i = 0; i < ps.length; i++ ) {
313                                         res = ps[i]();
314                                         if ( res === null ) {
315                                                 pos = originalPos;
316                                                 return null;
317                                         }
318                                         result.push( res );
319                                 }
320                                 return result;
321                         }
322                         // run the same parser over and over until it fails.
323                         // must succeed a minimum of n times or return null
324                         function nOrMore( n, p ) {
325                                 return function () {
326                                         var originalPos = pos,
327                                                 result = [],
328                                                 parsed = p();
329                                         while ( parsed !== null ) {
330                                                 result.push( parsed );
331                                                 parsed = p();
332                                         }
333                                         if ( result.length < n ) {
334                                                 pos = originalPos;
335                                                 return null;
336                                         }
337                                         return result;
338                                 };
339                         }
340                         // There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
341                         // But using this as a combinator seems to cause problems when combined with nOrMore().
342                         // May be some scoping issue
343                         function transform( p, fn ) {
344                                 return function () {
345                                         var result = p();
346                                         return result === null ? null : fn( result );
347                                 };
348                         }
349                         // Helpers -- just make ps out of simpler JS builtin types
350                         function makeStringParser( s ) {
351                                 var len = s.length;
352                                 return function () {
353                                         var result = null;
354                                         if ( input.substr( pos, len ) === s ) {
355                                                  result = s;
356                                                  pos += len;
357                                         }
358                                         return result;
359                                 };
360                         }
362                         /**
363                          * Makes a regex parser, given a RegExp object.
364                          * The regex being passed in should start with a ^ to anchor it to the start
365                          * of the string.
366                          *
367                          * @param {RegExp} regex anchored regex
368                          * @return {Function} function to parse input based on the regex
369                          */
370                         function makeRegexParser( regex ) {
371                                 return function () {
372                                         var matches = input.substr( pos ).match( regex );
373                                         if ( matches === null ) {
374                                                 return null;
375                                         }
376                                         pos += matches[0].length;
377                                         return matches[0];
378                                 };
379                         }
381                         /**
382                          *  ===================================================================
383                          *  General patterns above this line -- wikitext specific parsers below
384                          *  ===================================================================
385                          */
386                         // Parsing functions follow. All parsing functions work like this:
387                         // They don't accept any arguments.
388                         // Instead, they just operate non destructively on the string 'input'
389                         // As they can consume parts of the string, they advance the shared variable pos,
390                         // and return tokens (or whatever else they want to return).
391                         // some things are defined as closures and other things as ordinary functions
392                         // converting everything to a closure makes it a lot harder to debug... errors pop up
393                         // but some debuggers can't tell you exactly where they come from. Also the mutually
394                         // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
395                         // This may be because, to save code, memoization was removed
397                         regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
398                         regularLiteralWithoutBar = makeRegexParser(/^[^{}\[\]$\\|]/);
399                         regularLiteralWithoutSpace = makeRegexParser(/^[^{}\[\]$\s]/);
400                         regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
402                         backslash = makeStringParser( '\\' );
403                         doubleQuote = makeStringParser( '"' );
404                         singleQuote = makeStringParser( '\'' );
405                         anyCharacter = makeRegexParser( /^./ );
407                         openHtmlStartTag = makeStringParser( '<' );
408                         optionalForwardSlash = makeRegexParser( /^\/?/ );
409                         openHtmlEndTag = makeStringParser( '</' );
410                         htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
411                         closeHtmlTag = makeRegexParser( /^\s*>/ );
413                         function escapedLiteral() {
414                                 var result = sequence( [
415                                         backslash,
416                                         anyCharacter
417                                 ] );
418                                 return result === null ? null : result[1];
419                         }
420                         escapedOrLiteralWithoutSpace = choice( [
421                                 escapedLiteral,
422                                 regularLiteralWithoutSpace
423                         ] );
424                         escapedOrLiteralWithoutBar = choice( [
425                                 escapedLiteral,
426                                 regularLiteralWithoutBar
427                         ] );
428                         escapedOrRegularLiteral = choice( [
429                                 escapedLiteral,
430                                 regularLiteral
431                         ] );
432                         // Used to define "literals" without spaces, in space-delimited situations
433                         function literalWithoutSpace() {
434                                 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
435                                 return result === null ? null : result.join('');
436                         }
437                         // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
438                         // it is not a literal in the parameter
439                         function literalWithoutBar() {
440                                 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
441                                 return result === null ? null : result.join('');
442                         }
444                         // Used for wikilink page names.  Like literalWithoutBar, but
445                         // without allowing escapes.
446                         function unescapedLiteralWithoutBar() {
447                                 var result = nOrMore( 1, regularLiteralWithoutBar )();
448                                 return result === null ? null : result.join('');
449                         }
451                         function literal() {
452                                 var result = nOrMore( 1, escapedOrRegularLiteral )();
453                                 return result === null ? null : result.join('');
454                         }
456                         function curlyBraceTransformExpressionLiteral() {
457                                 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
458                                 return result === null ? null : result.join('');
459                         }
461                         asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
462                         htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
463                         htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
465                         whitespace = makeRegexParser( /^\s+/ );
466                         dollar = makeStringParser( '$' );
467                         digits = makeRegexParser( /^\d+/ );
469                         function replacement() {
470                                 var result = sequence( [
471                                         dollar,
472                                         digits
473                                 ] );
474                                 if ( result === null ) {
475                                         return null;
476                                 }
477                                 return [ 'REPLACE', parseInt( result[1], 10 ) - 1 ];
478                         }
479                         openExtlink = makeStringParser( '[' );
480                         closeExtlink = makeStringParser( ']' );
481                         // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
482                         function extlink() {
483                                 var result, parsedResult;
484                                 result = null;
485                                 parsedResult = sequence( [
486                                         openExtlink,
487                                         nonWhitespaceExpression,
488                                         whitespace,
489                                         nOrMore( 1, expression ),
490                                         closeExtlink
491                                 ] );
492                                 if ( parsedResult !== null ) {
493                                         result = [ 'EXTLINK', parsedResult[1] ];
494                                         // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
495                                         // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
496                                         if ( parsedResult[3].length === 1 ) {
497                                                 result.push( parsedResult[3][0] );
498                                         } else {
499                                                 result.push( ['CONCAT'].concat( parsedResult[3] ) );
500                                         }
501                                 }
502                                 return result;
503                         }
504                         // this is the same as the above extlink, except that the url is being passed on as a parameter
505                         function extLinkParam() {
506                                 var result = sequence( [
507                                         openExtlink,
508                                         dollar,
509                                         digits,
510                                         whitespace,
511                                         expression,
512                                         closeExtlink
513                                 ] );
514                                 if ( result === null ) {
515                                         return null;
516                                 }
517                                 return [ 'EXTLINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
518                         }
519                         openWikilink = makeStringParser( '[[' );
520                         closeWikilink = makeStringParser( ']]' );
521                         pipe = makeStringParser( '|' );
523                         function template() {
524                                 var result = sequence( [
525                                         openTemplate,
526                                         templateContents,
527                                         closeTemplate
528                                 ] );
529                                 return result === null ? null : result[1];
530                         }
532                         wikilinkPage = choice( [
533                                 unescapedLiteralWithoutBar,
534                                 template
535                         ] );
537                         function pipedWikilink() {
538                                 var result = sequence( [
539                                         wikilinkPage,
540                                         pipe,
541                                         expression
542                                 ] );
543                                 return result === null ? null : [ result[0], result[2] ];
544                         }
546                         wikilinkContents = choice( [
547                                 pipedWikilink,
548                                 wikilinkPage // unpiped link
549                         ] );
551                         function wikilink() {
552                                 var result, parsedResult, parsedLinkContents;
553                                 result = null;
555                                 parsedResult = sequence( [
556                                         openWikilink,
557                                         wikilinkContents,
558                                         closeWikilink
559                                 ] );
560                                 if ( parsedResult !== null ) {
561                                         parsedLinkContents = parsedResult[1];
562                                         result = [ 'WIKILINK' ].concat( parsedLinkContents );
563                                 }
564                                 return result;
565                         }
567                         // TODO: Support data- if appropriate
568                         function doubleQuotedHtmlAttributeValue() {
569                                 var parsedResult = sequence( [
570                                         doubleQuote,
571                                         htmlDoubleQuoteAttributeValue,
572                                         doubleQuote
573                                 ] );
574                                 return parsedResult === null ? null : parsedResult[1];
575                         }
577                         function singleQuotedHtmlAttributeValue() {
578                                 var parsedResult = sequence( [
579                                         singleQuote,
580                                         htmlSingleQuoteAttributeValue,
581                                         singleQuote
582                                 ] );
583                                 return parsedResult === null ? null : parsedResult[1];
584                         }
586                         function htmlAttribute() {
587                                 var parsedResult = sequence( [
588                                         whitespace,
589                                         asciiAlphabetLiteral,
590                                         htmlAttributeEquals,
591                                         choice( [
592                                                 doubleQuotedHtmlAttributeValue,
593                                                 singleQuotedHtmlAttributeValue
594                                         ] )
595                                 ] );
596                                 return parsedResult === null ? null : [parsedResult[1], parsedResult[3]];
597                         }
599                         /**
600                          * Checks if HTML is allowed
601                          *
602                          * @param {string} startTagName HTML start tag name
603                          * @param {string} endTagName HTML start tag name
604                          * @param {Object} attributes array of consecutive key value pairs,
605                          *  with index 2 * n being a name and 2 * n + 1 the associated value
606                          * @return {boolean} true if this is HTML is allowed, false otherwise
607                          */
608                         function isAllowedHtml( startTagName, endTagName, attributes ) {
609                                 var i, len, attributeName;
611                                 startTagName = startTagName.toLowerCase();
612                                 endTagName = endTagName.toLowerCase();
613                                 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
614                                         return false;
615                                 }
617                                 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
618                                         attributeName = attributes[i];
619                                         if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
620                                              $.inArray( attributeName, settings.allowedHtmlAttributesByElement[startTagName] || [] ) === -1 ) {
621                                                 return false;
622                                         }
623                                 }
625                                 return true;
626                         }
628                         function htmlAttributes() {
629                                 var parsedResult = nOrMore( 0, htmlAttribute )();
630                                 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
631                                 return concat.apply( ['HTMLATTRIBUTES'], parsedResult );
632                         }
634                         // Subset of allowed HTML markup.
635                         // Most elements and many attributes allowed on the server are not supported yet.
636                         function html() {
637                                 var result = null, parsedOpenTagResult, parsedHtmlContents,
638                                         parsedCloseTagResult, wrappedAttributes, attributes,
639                                         startTagName, endTagName, startOpenTagPos, startCloseTagPos,
640                                         endOpenTagPos, endCloseTagPos;
642                                 // Break into three sequence calls.  That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
643                                 // 1. open through closeHtmlTag
644                                 // 2. expression
645                                 // 3. openHtmlEnd through close
646                                 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
648                                 startOpenTagPos = pos;
649                                 parsedOpenTagResult = sequence( [
650                                         openHtmlStartTag,
651                                         asciiAlphabetLiteral,
652                                         htmlAttributes,
653                                         optionalForwardSlash,
654                                         closeHtmlTag
655                                 ] );
657                                 if ( parsedOpenTagResult === null ) {
658                                         return null;
659                                 }
661                                 endOpenTagPos = pos;
662                                 startTagName = parsedOpenTagResult[1];
664                                 parsedHtmlContents = nOrMore( 0, expression )();
666                                 startCloseTagPos = pos;
667                                 parsedCloseTagResult = sequence( [
668                                         openHtmlEndTag,
669                                         asciiAlphabetLiteral,
670                                         closeHtmlTag
671                                 ] );
673                                 if ( parsedCloseTagResult === null ) {
674                                         // Closing tag failed.  Return the start tag and contents.
675                                         return [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ].concat( parsedHtmlContents );
676                                 }
678                                 endCloseTagPos = pos;
679                                 endTagName = parsedCloseTagResult[1];
680                                 wrappedAttributes = parsedOpenTagResult[2];
681                                 attributes = wrappedAttributes.slice( 1 );
682                                 if ( isAllowedHtml( startTagName, endTagName, attributes) ) {
683                                         result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ].concat( parsedHtmlContents );
684                                 } else {
685                                         // HTML is not allowed, so contents will remain how
686                                         // it was, while HTML markup at this level will be
687                                         // treated as text
688                                         // E.g. assuming script tags are not allowed:
689                                         //
690                                         // <script>[[Foo|bar]]</script>
691                                         //
692                                         // results in '&lt;script&gt;' and '&lt;/script&gt;'
693                                         // (not treated as an HTML tag), surrounding a fully
694                                         // parsed HTML link.
695                                         //
696                                         // Concatenate everything from the tag, flattening the contents.
697                                         result = [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ].concat( parsedHtmlContents, input.substring( startCloseTagPos, endCloseTagPos ) );
698                                 }
700                                 return result;
701                         }
703                         templateName = transform(
704                                 // see $wgLegalTitleChars
705                                 // not allowing : due to the need to catch "PLURAL:$1"
706                                 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
707                                 function ( result ) { return result.toString(); }
708                         );
709                         function templateParam() {
710                                 var expr, result;
711                                 result = sequence( [
712                                         pipe,
713                                         nOrMore( 0, paramExpression )
714                                 ] );
715                                 if ( result === null ) {
716                                         return null;
717                                 }
718                                 expr = result[1];
719                                 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
720                                 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[0];
721                         }
723                         function templateWithReplacement() {
724                                 var result = sequence( [
725                                         templateName,
726                                         colon,
727                                         replacement
728                                 ] );
729                                 return result === null ? null : [ result[0], result[2] ];
730                         }
731                         function templateWithOutReplacement() {
732                                 var result = sequence( [
733                                         templateName,
734                                         colon,
735                                         paramExpression
736                                 ] );
737                                 return result === null ? null : [ result[0], result[2] ];
738                         }
739                         colon = makeStringParser(':');
740                         templateContents = choice( [
741                                 function () {
742                                         var res = sequence( [
743                                                 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
744                                                 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
745                                                 choice( [ templateWithReplacement, templateWithOutReplacement ] ),
746                                                 nOrMore( 0, templateParam )
747                                         ] );
748                                         return res === null ? null : res[0].concat( res[1] );
749                                 },
750                                 function () {
751                                         var res = sequence( [
752                                                 templateName,
753                                                 nOrMore( 0, templateParam )
754                                         ] );
755                                         if ( res === null ) {
756                                                 return null;
757                                         }
758                                         return [ res[0] ].concat( res[1] );
759                                 }
760                         ] );
761                         openTemplate = makeStringParser('{{');
762                         closeTemplate = makeStringParser('}}');
763                         nonWhitespaceExpression = choice( [
764                                 template,
765                                 wikilink,
766                                 extLinkParam,
767                                 extlink,
768                                 replacement,
769                                 literalWithoutSpace
770                         ] );
771                         paramExpression = choice( [
772                                 template,
773                                 wikilink,
774                                 extLinkParam,
775                                 extlink,
776                                 replacement,
777                                 literalWithoutBar
778                         ] );
780                         expression = choice( [
781                                 template,
782                                 wikilink,
783                                 extLinkParam,
784                                 extlink,
785                                 replacement,
786                                 html,
787                                 literal
788                         ] );
790                         // Used when only {{-transformation is wanted, for 'text'
791                         // or 'escaped' formats
792                         curlyBraceTransformExpression = choice( [
793                                 template,
794                                 replacement,
795                                 curlyBraceTransformExpressionLiteral
796                         ] );
799                         /**
800                          * Starts the parse
801                          *
802                          * @param {Function} rootExpression root parse function
803                          */
804                         function start( rootExpression ) {
805                                 var result = nOrMore( 0, rootExpression )();
806                                 if ( result === null ) {
807                                         return null;
808                                 }
809                                 return [ 'CONCAT' ].concat( result );
810                         }
811                         // everything above this point is supposed to be stateless/static, but
812                         // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
813                         // finally let's do some actual work...
815                         // If you add another possible rootExpression, you must update the astCache key scheme.
816                         result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
818                         /*
819                          * For success, the p must have gotten to the end of the input
820                          * and returned a non-null.
821                          * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
822                          */
823                         if ( result === null || pos !== input.length ) {
824                                 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
825                         }
826                         return result;
827                 }
829         };
830         /**
831          * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
832          */
833         mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
834                 this.language = language;
835                 var jmsg = this;
836                 $.each( magic, function ( key, val ) {
837                         jmsg[ key.toLowerCase() ] = function () {
838                                 return val;
839                         };
840                 } );
841                 /**
842                  * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
843                  * Walk entire node structure, applying replacements and template functions when appropriate
844                  * @param {Mixed} abstract syntax tree (top node or subnode)
845                  * @param {Array} replacements for $1, $2, ... $n
846                  * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
847                  */
848                 this.emit = function ( node, replacements ) {
849                         var ret, subnodes, operation,
850                                 jmsg = this;
851                         switch ( typeof node ) {
852                                 case 'string':
853                                 case 'number':
854                                         ret = node;
855                                         break;
856                                 // typeof returns object for arrays
857                                 case 'object':
858                                         // node is an array of nodes
859                                         subnodes = $.map( node.slice( 1 ), function ( n ) {
860                                                 return jmsg.emit( n, replacements );
861                                         } );
862                                         operation = node[0].toLowerCase();
863                                         if ( typeof jmsg[operation] === 'function' ) {
864                                                 ret = jmsg[ operation ]( subnodes, replacements );
865                                         } else {
866                                                 throw new Error( 'Unknown operation "' + operation + '"' );
867                                         }
868                                         break;
869                                 case 'undefined':
870                                         // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
871                                         // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
872                                         // The logical thing is probably to return the empty string here when we encounter undefined.
873                                         ret = '';
874                                         break;
875                                 default:
876                                         throw new Error( 'Unexpected type in AST: ' + typeof node );
877                         }
878                         return ret;
879                 };
880         };
881         // For everything in input that follows double-open-curly braces, there should be an equivalent parser
882         // function. For instance {{PLURAL ... }} will be processed by 'plural'.
883         // If you have 'magic words' then configure the parser to have them upon creation.
884         //
885         // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
886         // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
887         mw.jqueryMsg.htmlEmitter.prototype = {
888                 /**
889                  * Parsing has been applied depth-first we can assume that all nodes here are single nodes
890                  * Must return a single node to parents -- a jQuery with synthetic span
891                  * However, unwrap any other synthetic spans in our children and pass them upwards
892                  * @param {Array} nodes - mixed, some single nodes, some arrays of nodes
893                  * @return {jQuery}
894                  */
895                 concat: function ( nodes ) {
896                         var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
897                         $.each( nodes, function ( i, node ) {
898                                 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
899                                         $.each( node.contents(), function ( j, childNode ) {
900                                                 appendWithoutParsing( $span, childNode );
901                                         } );
902                                 } else {
903                                         // Let jQuery append nodes, arrays of nodes and jQuery objects
904                                         // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
905                                         appendWithoutParsing( $span, node );
906                                 }
907                         } );
908                         return $span;
909                 },
911                 /**
912                  * Return escaped replacement of correct index, or string if unavailable.
913                  * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
914                  * if the specified parameter is not found return the same string
915                  * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
916                  * TODO: Throw error if nodes.length > 1 ?
917                  * @param {Array} of one element, integer, n >= 0
918                  * @return {String} replacement
919                  */
920                 replace: function ( nodes, replacements ) {
921                         var index = parseInt( nodes[0], 10 );
923                         if ( index < replacements.length ) {
924                                 return replacements[index];
925                         } else {
926                                 // index not found, fallback to displaying variable
927                                 return '$' + ( index + 1 );
928                         }
929                 },
931                 /**
932                  * Transform wiki-link
933                  *
934                  * TODO:
935                  * It only handles basic cases, either no pipe, or a pipe with an explicit
936                  * anchor.
937                  *
938                  * It does not attempt to handle features like the pipe trick.
939                  * However, the pipe trick should usually not be present in wikitext retrieved
940                  * from the server, since the replacement is done at save time.
941                  * It may, though, if the wikitext appears in extension-controlled content.
942                  *
943                  * @param nodes
944                  */
945                 wikilink: function ( nodes ) {
946                         var page, anchor, url;
948                         page = nodes[0];
949                         url = mw.util.wikiGetlink( page );
951                         // [[Some Page]] or [[Namespace:Some Page]]
952                         if ( nodes.length === 1 ) {
953                                 anchor = page;
954                         }
956                         /*
957                          * [[Some Page|anchor text]] or
958                          * [[Namespace:Some Page|anchor]
959                          */
960                         else {
961                                 anchor = nodes[1];
962                         }
964                         return $( '<a />' ).attr( {
965                                 title: page,
966                                 href: url
967                         } ).text( anchor );
968                 },
970                 /**
971                  * Converts array of HTML element key value pairs to object
972                  *
973                  * @param {Array} nodes array of consecutive key value pairs, with index 2 * n being a name and 2 * n + 1 the associated value
974                  * @return {Object} object mapping attribute name to attribute value
975                  */
976                 htmlattributes: function ( nodes ) {
977                         var i, len, mapping = {};
978                         for ( i = 0, len = nodes.length; i < len; i += 2 ) {
979                                 mapping[nodes[i]] = decodePrimaryHtmlEntities( nodes[i + 1] );
980                         }
981                         return mapping;
982                 },
984                 /**
985                  * Handles an (already-validated) HTML element.
986                  *
987                  * @param {Array} nodes nodes to process when creating element
988                  * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
989                  */
990                 htmlelement: function ( nodes ) {
991                         var tagName, attributes, contents, $element;
993                         tagName = nodes.shift();
994                         attributes = nodes.shift();
995                         contents = nodes;
996                         $element = $( document.createElement( tagName ) ).attr( attributes );
997                         return appendWithoutParsing( $element, contents );
998                 },
1000                 /**
1001                  * Transform parsed structure into external link
1002                  * If the href is a jQuery object, treat it as "enclosing" the link text.
1003                  *              ... function, treat it as the click handler
1004                  *              ... string, treat it as a URI
1005                  * TODO: throw an error if nodes.length > 2 ?
1006                  * @param {Array} of two elements, {jQuery|Function|String} and {String}
1007                  * @return {jQuery}
1008                  */
1009                 extlink: function ( nodes ) {
1010                         var $el,
1011                                 arg = nodes[0],
1012                                 contents = nodes[1];
1013                         if ( arg instanceof jQuery ) {
1014                                 $el = arg;
1015                         } else {
1016                                 $el = $( '<a>' );
1017                                 if ( typeof arg === 'function' ) {
1018                                         $el.click( arg ).attr( 'href', '#' );
1019                                 } else {
1020                                         $el.attr( 'href', arg.toString() );
1021                                 }
1022                         }
1023                         return appendWithoutParsing( $el, contents );
1024                 },
1026                 /**
1027                  * This is basically use a combination of replace + external link (link with parameter
1028                  * as url), but we don't want to run the regular replace here-on: inserting a
1029                  * url as href-attribute of a link will automatically escape it already, so
1030                  * we don't want replace to (manually) escape it as well.
1031                  * TODO throw error if nodes.length > 1 ?
1032                  * @param {Array} of one element, integer, n >= 0
1033                  * @return {String} replacement
1034                  */
1035                 extlinkparam: function ( nodes, replacements ) {
1036                         var replacement,
1037                                 index = parseInt( nodes[0], 10 );
1038                         if ( index < replacements.length) {
1039                                 replacement = replacements[index];
1040                         } else {
1041                                 replacement = '$' + ( index + 1 );
1042                         }
1043                         return this.extlink( [ replacement, nodes[1] ] );
1044                 },
1046                 /**
1047                  * Transform parsed structure into pluralization
1048                  * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1049                  * So convert it back with the current language's convertNumber.
1050                  * @param {Array} of nodes, [ {String|Number}, {String}, {String} ... ]
1051                  * @return {String} selected pluralized form according to current language
1052                  */
1053                 plural: function ( nodes ) {
1054                         var forms, count;
1055                         count = parseFloat( this.language.convertNumber( nodes[0], true ) );
1056                         forms = nodes.slice(1);
1057                         return forms.length ? this.language.convertPlural( count, forms ) : '';
1058                 },
1060                 /**
1061                  * Transform parsed structure according to gender.
1062                  * Usage {{gender:[ gender | mw.user object ] | masculine form|feminine form|neutral form}}.
1063                  * The first node is either a string, which can be "male" or "female",
1064                  * or a User object (not a username).
1065                  *
1066                  * @param {Array} of nodes, [ {String|mw.User}, {String}, {String}, {String} ]
1067                  * @return {String} selected gender form according to current language
1068                  */
1069                 gender: function ( nodes ) {
1070                         var gender, forms;
1072                         if  ( nodes[0] && nodes[0].options instanceof mw.Map ) {
1073                                 gender = nodes[0].options.get( 'gender' );
1074                         } else {
1075                                 gender = nodes[0];
1076                         }
1078                         forms = nodes.slice( 1 );
1080                         return this.language.gender( gender, forms );
1081                 },
1083                 /**
1084                  * Transform parsed structure into grammar conversion.
1085                  * Invoked by putting {{grammar:form|word}} in a message
1086                  * @param {Array} of nodes [{Grammar case eg: genitive}, {String word}]
1087                  * @return {String} selected grammatical form according to current language
1088                  */
1089                 grammar: function ( nodes ) {
1090                         var form = nodes[0],
1091                                 word = nodes[1];
1092                         return word && form && this.language.convertGrammar( word, form );
1093                 },
1095                 /**
1096                  * Tranform parsed structure into a int: (interface language) message include
1097                  * Invoked by putting {{int:othermessage}} into a message
1098                  * @param {Array} of nodes
1099                  * @return {string} Other message
1100                  */
1101                 int: function ( nodes ) {
1102                         return mw.jqueryMsg.getMessageFunction()( nodes[0].toLowerCase() );
1103                 },
1105                 /**
1106                  * Takes an unformatted number (arab, no group separators and . as decimal separator)
1107                  * and outputs it in the localized digit script and formatted with decimal
1108                  * separator, according to the current language
1109                  * @param {Array} of nodes
1110                  * @return {Number|String} formatted number
1111                  */
1112                 formatnum: function ( nodes ) {
1113                         var isInteger = ( nodes[1] && nodes[1] === 'R' ) ? true : false,
1114                                 number = nodes[0];
1116                         return this.language.convertNumber( number, isInteger );
1117                 }
1118         };
1119         // Deprecated! don't rely on gM existing.
1120         // The window.gM ought not to be required - or if required, not required here.
1121         // But moving it to extensions breaks it (?!)
1122         // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1123         window.gM = mw.jqueryMsg.getMessageFunction();
1124         $.fn.msg = mw.jqueryMsg.getPlugin();
1126         // Replace the default message parser with jqueryMsg
1127         oldParser = mw.Message.prototype.parser;
1128         mw.Message.prototype.parser = function () {
1129                 var messageFunction;
1131                 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1132                 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1133                 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1134                 // Do not use mw.jqueryMsg unless required
1135                 if ( this.format === 'plain' || !/\{\{|[\[<>]/.test(this.map.get( this.key ) ) ) {
1136                         // Fall back to mw.msg's simple parser
1137                         return oldParser.apply( this );
1138                 }
1140                 messageFunction = mw.jqueryMsg.getMessageFunction( {
1141                         'messages': this.map,
1142                         // For format 'escaped', escaping part is handled by mediawiki.js
1143                         'format': this.format
1144                 } );
1145                 return messageFunction( this.key, this.parameters );
1146         };
1148 }( mediaWiki, jQuery ) );