2 * Experimental advanced wikitext parser-emitter.
3 * See: http://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
5 * @author neilk@wikimedia.org
8 ( function( mw
, $, undefined ) {
13 * Given parser options, return a function that parses a key and replacements, returning jQuery object
14 * @param {Object} parser options
15 * @return {Function} accepting ( String message key, String replacement1, String replacement2 ... ) and returning {jQuery}
17 function getFailableParserFn( options
) {
18 var parser
= new mw
.jqueryMsg
.parser( options
);
20 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
21 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
22 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
24 * @param {Array} first element is the key, replacements may be in array in 2nd element, or remaining elements.
27 return function( args
) {
29 var argsArray
= $.isArray( args
[1] ) ? args
[1] : $.makeArray( args
).slice( 1 );
30 var escapedArgsArray
= $.map( argsArray
, function( arg
) {
31 return typeof arg
=== 'string' ? mw
.html
.escape( arg
) : arg
;
34 return parser
.parse( key
, escapedArgsArray
);
36 return $( '<span></span>' ).append( key
+ ': ' + e
.message
);
43 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
45 * window.gM = mediaWiki.parser.getMessageFunction( options );
46 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
48 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
49 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
51 * @param {Array} parser options
52 * @return {Function} function suitable for assigning to window.gM
54 mw
.jqueryMsg
.getMessageFunction = function( options
) {
55 var failableParserFn
= getFailableParserFn( options
);
57 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
58 * somefunction(a, b, c, d)
60 * somefunction(a, [b, c, d])
62 * @param {String} message key
63 * @param {Array} optional replacements (can also specify variadically)
64 * @return {String} rendered HTML as string
66 return function( /* key, replacements */ ) {
67 return failableParserFn( arguments
).html();
73 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
74 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
76 * $.fn.msg = mediaWiki.parser.getJqueryPlugin( options );
77 * var userlink = $( '<a>' ).click( function() { alert( "hello!!") } );
78 * $( 'p#headline' ).msg( 'hello-user', userlink );
80 * @param {Array} parser options
81 * @return {Function} function suitable for assigning to jQuery plugin, such as $.fn.msg
83 mw
.jqueryMsg
.getPlugin = function( options
) {
84 var failableParserFn
= getFailableParserFn( options
);
86 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
87 * somefunction(a, b, c, d)
89 * somefunction(a, [b, c, d])
91 * We append to 'this', which in a jQuery plugin context will be the selected elements.
92 * @param {String} message key
93 * @param {Array} optional replacements (can also specify variadically)
94 * @return {jQuery} this
96 return function( /* key, replacements */ ) {
97 var $target
= this.empty();
98 $.each( failableParserFn( arguments
).contents(), function( i
, node
) {
99 $target
.append( node
);
105 var parserDefaults
= {
107 'messages' : mw
.messages
,
108 'language' : mw
.language
113 * Describes an object, whose primary duty is to .parse() message keys.
114 * @param {Array} options
116 mw
.jqueryMsg
.parser = function( options
) {
117 this.settings
= $.extend( {}, parserDefaults
, options
);
118 this.emitter
= new mw
.jqueryMsg
.htmlEmitter( this.settings
.language
, this.settings
.magic
);
121 mw
.jqueryMsg
.parser
.prototype = {
123 // cache, map of mediaWiki message key to the AST of the message. In most cases, the message is a string so this is identical.
124 // (This is why we would like to move this functionality server-side).
128 * Where the magic happens.
129 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
130 * If an error is thrown, returns original key, and logs the error
131 * @param {String} message key
132 * @param {Array} replacements for $1, $2... $n
135 parse: function( key
, replacements
) {
136 return this.emitter
.emit( this.getAst( key
), replacements
);
140 * Fetch the message string associated with a key, return parsed structure. Memoized.
141 * Note that we pass '[' + key + ']' back for a missing message here.
142 * @param {String} key
143 * @return {String|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
145 getAst: function( key
) {
146 if ( this.astCache
[ key
] === undefined ) {
147 var wikiText
= this.settings
.messages
.get( key
);
148 if ( typeof wikiText
!== 'string' ) {
149 wikiText
= "\\[" + key
+ "\\]";
151 this.astCache
[ key
] = this.wikiTextToAst( wikiText
);
153 return this.astCache
[ key
];
157 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
159 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
160 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
162 * @param {String} message string wikitext
164 * @return {Mixed} abstract syntax tree
166 wikiTextToAst: function( input
) {
168 // Indicates current position in input as we parse through it.
169 // Shared among all parsing functions below.
172 // =========================================================
173 // parsing combinators - could be a library on its own
174 // =========================================================
177 // Try parsers until one works, if none work return null
178 function choice( ps
) {
180 for ( var i
= 0; i
< ps
.length
; i
++ ) {
181 var result
= ps
[i
]();
182 if ( result
!== null ) {
190 // try several ps in a row, all must succeed or return null
191 // this is the only eager one
192 function sequence( ps
) {
193 var originalPos
= pos
;
195 for ( var i
= 0; i
< ps
.length
; i
++ ) {
197 if ( res
=== null ) {
206 // run the same parser over and over until it fails.
207 // must succeed a minimum of n times or return null
208 function nOrMore( n
, p
) {
210 var originalPos
= pos
;
213 while ( parsed
!== null ) {
214 result
.push( parsed
);
217 if ( result
.length
< n
) {
225 // There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
226 // But using this as a combinator seems to cause problems when combined with nOrMore().
227 // May be some scoping issue
228 function transform( p
, fn
) {
231 return result
=== null ? null : fn( result
);
235 // Helpers -- just make ps out of simpler JS builtin types
237 function makeStringParser( s
) {
241 if ( input
.substr( pos
, len
) === s
) {
249 function makeRegexParser( regex
) {
251 var matches
= input
.substr( pos
).match( regex
);
252 if ( matches
=== null ) {
255 pos
+= matches
[0].length
;
262 * ===================================================================
263 * General patterns above this line -- wikitext specific parsers below
264 * ===================================================================
267 // Parsing functions follow. All parsing functions work like this:
268 // They don't accept any arguments.
269 // Instead, they just operate non destructively on the string 'input'
270 // As they can consume parts of the string, they advance the shared variable pos,
271 // and return tokens (or whatever else they want to return).
273 // some things are defined as closures and other things as ordinary functions
274 // converting everything to a closure makes it a lot harder to debug... errors pop up
275 // but some debuggers can't tell you exactly where they come from. Also the mutually
276 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
277 // This may be because, to save code, memoization was removed
280 var regularLiteral
= makeRegexParser( /^[^{}[\]$\\]/ );
281 var regularLiteralWithoutBar
= makeRegexParser(/^[^{}[\]$\\|]/);
282 var regularLiteralWithoutSpace
= makeRegexParser(/^[^{}[\]$\s]/);
284 var backslash
= makeStringParser( "\\" );
285 var anyCharacter
= makeRegexParser( /^./ );
287 function escapedLiteral() {
288 var result
= sequence( [
292 return result
=== null ? null : result
[1];
295 var escapedOrLiteralWithoutSpace
= choice( [
297 regularLiteralWithoutSpace
300 var escapedOrLiteralWithoutBar
= choice( [
302 regularLiteralWithoutBar
305 var escapedOrRegularLiteral
= choice( [
310 // Used to define "literals" without spaces, in space-delimited situations
311 function literalWithoutSpace() {
312 var result
= nOrMore( 1, escapedOrLiteralWithoutSpace
)();
313 return result
=== null ? null : result
.join('');
316 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
317 // it is not a literal in the parameter
318 function literalWithoutBar() {
319 var result
= nOrMore( 1, escapedOrLiteralWithoutBar
)();
320 return result
=== null ? null : result
.join('');
324 var result
= nOrMore( 1, escapedOrRegularLiteral
)();
325 return result
=== null ? null : result
.join('');
328 var whitespace
= makeRegexParser( /^\s+/ );
329 var dollar
= makeStringParser( '$' );
330 var digits
= makeRegexParser( /^\d+/ );
332 function replacement() {
333 var result
= sequence( [
337 if ( result
=== null ) {
340 return [ 'REPLACE', parseInt( result
[1], 10 ) - 1 ];
344 var openExtlink
= makeStringParser( '[' );
345 var closeExtlink
= makeStringParser( ']' );
347 // this extlink MUST have inner text, e.g. [foo] not allowed; [foo bar] is allowed
350 var parsedResult
= sequence( [
352 nonWhitespaceExpression
,
357 if ( parsedResult
!== null ) {
358 result
= [ 'LINK', parsedResult
[1], parsedResult
[3] ];
363 var openLink
= makeStringParser( '[[' );
364 var closeLink
= makeStringParser( ']]' );
368 var parsedResult
= sequence( [
373 if ( parsedResult
!== null ) {
374 result
= [ 'WLINK', parsedResult
[1] ];
379 var templateName
= transform(
380 // see $wgLegalTitleChars
381 // not allowing : due to the need to catch "PLURAL:$1"
382 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+-]+/ ),
383 function( result
) { return result
.toString(); }
386 function templateParam() {
387 var result
= sequence( [
389 nOrMore( 0, paramExpression
)
391 if ( result
=== null ) {
394 var expr
= result
[1];
395 // use a "CONCAT" operator if there are multiple nodes, otherwise return the first node, raw.
396 return expr
.length
> 1 ? [ "CONCAT" ].concat( expr
) : expr
[0];
399 var pipe
= makeStringParser( '|' );
401 function templateWithReplacement() {
402 var result
= sequence( [
407 return result
=== null ? null : [ result
[0], result
[2] ];
410 var colon
= makeStringParser(':');
412 var templateContents
= choice( [
414 var res
= sequence( [
415 templateWithReplacement
,
416 nOrMore( 0, templateParam
)
418 return res
=== null ? null : res
[0].concat( res
[1] );
421 var res
= sequence( [
423 nOrMore( 0, templateParam
)
425 if ( res
=== null ) {
428 return [ res
[0] ].concat( res
[1] );
432 var openTemplate
= makeStringParser('{{');
433 var closeTemplate
= makeStringParser('}}');
435 function template() {
436 var result
= sequence( [
441 return result
=== null ? null : result
[1];
444 var nonWhitespaceExpression
= choice( [
452 var paramExpression
= choice( [
460 var expression
= choice( [
469 var result
= nOrMore( 0, expression
)();
470 if ( result
=== null ) {
473 return [ "CONCAT" ].concat( result
);
476 // everything above this point is supposed to be stateless/static, but
477 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
479 // finally let's do some actual work...
481 var result
= start();
484 * For success, the p must have gotten to the end of the input
485 * and returned a non-null.
486 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
488 if (result
=== null || pos
!== input
.length
) {
489 throw new Error( "Parse error at position " + pos
.toString() + " in input: " + input
);
497 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
499 mw
.jqueryMsg
.htmlEmitter = function( language
, magic
) {
500 this.language
= language
;
503 $.each( magic
, function( key
, val
) {
504 _this
[ key
.toLowerCase() ] = function() { return val
; };
508 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
509 * Walk entire node structure, applying replacements and template functions when appropriate
510 * @param {Mixed} abstract syntax tree (top node or subnode)
511 * @param {Array} replacements for $1, $2, ... $n
512 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
514 this.emit = function( node
, replacements
) {
517 switch( typeof node
) {
522 case 'object': // node is an array of nodes
523 var subnodes
= $.map( node
.slice( 1 ), function( n
) {
524 return _this
.emit( n
, replacements
);
526 var operation
= node
[0].toLowerCase();
527 if ( typeof _this
[operation
] === 'function' ) {
528 ret
= _this
[ operation
]( subnodes
, replacements
);
530 throw new Error( 'unknown operation "' + operation
+ '"' );
534 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
535 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
536 // The logical thing is probably to return the empty string here when we encounter undefined.
540 throw new Error( 'unexpected type in AST: ' + typeof node
);
547 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
548 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
549 // If you have 'magic words' then configure the parser to have them upon creation.
551 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
552 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
553 mw
.jqueryMsg
.htmlEmitter
.prototype = {
556 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
557 * Must return a single node to parents -- a jQuery with synthetic span
558 * However, unwrap any other synthetic spans in our children and pass them upwards
559 * @param {Array} nodes - mixed, some single nodes, some arrays of nodes
562 concat: function( nodes
) {
563 var span
= $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
564 $.each( nodes
, function( i
, node
) {
565 if ( node
instanceof jQuery
&& node
.hasClass( 'mediaWiki_htmlEmitter' ) ) {
566 $.each( node
.contents(), function( j
, childNode
) {
567 span
.append( childNode
);
570 // strings, integers, anything else
578 * Return replacement of correct index, or string if unavailable.
579 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
580 * if the specified parameter is not found return the same string
581 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
582 * TODO throw error if nodes.length > 1 ?
583 * @param {Array} of one element, integer, n >= 0
584 * @return {String} replacement
586 replace: function( nodes
, replacements
) {
587 var index
= parseInt( nodes
[0], 10 );
588 return index
< replacements
.length
? replacements
[index
] : '$' + ( index
+ 1 );
592 * Transform wiki-link
595 wlink: function( nodes
) {
596 return "unimplemented";
600 * Transform parsed structure into external link
601 * If the href is a jQuery object, treat it as "enclosing" the link text.
602 * ... function, treat it as the click handler
603 * ... string, treat it as a URI
604 * TODO: throw an error if nodes.length > 2 ?
605 * @param {Array} of two elements, {jQuery|Function|String} and {String}
608 link: function( nodes
) {
610 var contents
= nodes
[1];
612 if ( arg
instanceof jQuery
) {
616 if ( typeof arg
=== 'function' ) {
617 $el
.click( arg
).attr( 'href', '#' );
619 $el
.attr( 'href', arg
.toString() );
622 $el
.append( contents
);
627 * Transform parsed structure into pluralization
628 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
629 * So convert it back with the current language's convertNumber.
630 * @param {Array} of nodes, [ {String|Number}, {String}, {String} ... ]
631 * @return {String} selected pluralized form according to current language
633 plural: function( nodes
) {
634 var count
= parseInt( this.language
.convertNumber( nodes
[0], true ), 10 );
635 var forms
= nodes
.slice(1);
636 return forms
.length
? this.language
.convertPlural( count
, forms
) : '';
640 * Transform parsed structure into gender
641 * Usage {{gender:[gender| mw.user object ] | masculine|feminine|neutral}}.
642 * @param {Array} of nodes, [ {String|mw.User}, {String}, {String} , {String} ]
643 * @return {String} selected gender form according to current language
645 gender: function( nodes
) {
647 if ( nodes
[0] && nodes
[0].options
instanceof mw
.Map
){
648 gender
= nodes
[0].options
.get( 'gender' );
652 var forms
= nodes
.slice(1);
653 return this.language
.gender( gender
, forms
);
658 // TODO figure out a way to make magic work with common globals like wgSiteName, without requiring init from library users...
659 // var options = { magic: { 'SITENAME' : mw.config.get( 'wgSiteName' ) } };
661 // deprecated! don't rely on gM existing.
662 // the window.gM ought not to be required - or if required, not required here. But moving it to extensions breaks it (?!)
663 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
664 window
.gM
= mw
.jqueryMsg
.getMessageFunction();
666 $.fn
.msg
= mw
.jqueryMsg
.getPlugin();
668 // Replace the default message parser with jqueryMsg
669 var oldParser
= mw
.Message
.prototype.parser
;
670 mw
.Message
.prototype.parser = function() {
671 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
672 // Caching is somewhat problematic, because we do need different message functions for different maps, so
673 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
675 // Do not use mw.jqueryMsg unless required
676 if ( this.map
.get( this.key
).indexOf( '{{' ) < 0 ) {
677 // Fall back to mw.msg's simple parser
678 return oldParser
.apply( this );
681 var messageFunction
= mw
.jqueryMsg
.getMessageFunction( { 'messages': this.map
} );
682 return messageFunction( this.key
, this.parameters
);
685 } )( mediaWiki
, jQuery
);