Merge "Update the documentation at the top of parserTests.txt"
[lhc/web/wiklou.git] / resources / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic : {
18 'SITENAME' : mw.config.get( 'wgSiteName' )
19 },
20 // This is a whitelist based on, but simpler than, Sanitizer.php.
21 // Self-closing tags are not currently supported.
22 allowedHtmlElements : [
23 'b',
24 'i'
25 ],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes : [
29 // HTML
30 'id',
31 'class',
32 'style',
33 'lang',
34 'dir',
35 'title',
36
37 // WAI-ARIA
38 'role'
39 ],
40
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement : {},
45 messages : mw.messages,
46 language : mw.language,
47
48 // Same meaning as in mediawiki.js.
49 //
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
53 //
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
57 format: 'parse'
58
59 };
60
61 /**
62 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
63 * convert what it detects as an htmlString to an element.
64 *
65 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
66 *
67 * @private
68 * @param {jQuery} $parent Parent node wrapped by jQuery
69 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
70 * @return {jQuery} $parent
71 */
72 function appendWithoutParsing( $parent, children ) {
73 var i, len;
74
75 if ( !$.isArray( children ) ) {
76 children = [children];
77 }
78
79 for ( i = 0, len = children.length; i < len; i++ ) {
80 if ( typeof children[i] !== 'object' ) {
81 children[i] = document.createTextNode( children[i] );
82 }
83 }
84
85 return $parent.append( children );
86 }
87
88 /**
89 * Decodes the main HTML entities, those encoded by mw.html.escape.
90 *
91 * @private
92 * @param {string} encoded Encoded string
93 * @return {string} String with those entities decoded
94 */
95 function decodePrimaryHtmlEntities( encoded ) {
96 return encoded
97 .replace( /&#039;/g, '\'' )
98 .replace( /&quot;/g, '"' )
99 .replace( /&lt;/g, '<' )
100 .replace( /&gt;/g, '>' )
101 .replace( /&amp;/g, '&' );
102 }
103
104 /**
105 * Given parser options, return a function that parses a key and replacements, returning jQuery object
106 *
107 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
108 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
109 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
110 * @private
111 * @param {Object} options Parser options
112 * @return {Function}
113 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
114 * @return {jQuery} return.return
115 */
116 function getFailableParserFn( options ) {
117 var parser = new mw.jqueryMsg.parser( options );
118
119 return function ( args ) {
120 var key = args[0],
121 argsArray = $.isArray( args[1] ) ? args[1] : slice.call( args, 1 );
122 try {
123 return parser.parse( key, argsArray );
124 } catch ( e ) {
125 return $( '<span>' ).text( key + ': ' + e.message );
126 }
127 };
128 }
129
130 mw.jqueryMsg = {};
131
132 /**
133 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
134 * e.g.
135 *
136 * window.gM = mediaWiki.parser.getMessageFunction( options );
137 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
138 *
139 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
140 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
141 *
142 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
143 * somefunction( a, b, c, d )
144 * is equivalent to
145 * somefunction( a, [b, c, d] )
146 *
147 * @param {Object} options parser options
148 * @return {Function} Function suitable for assigning to window.gM
149 * @return {string} return.key Message key.
150 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
151 * @return {string} return.return Rendered HTML.
152 */
153 mw.jqueryMsg.getMessageFunction = function ( options ) {
154 var failableParserFn = getFailableParserFn( options ),
155 format;
156
157 if ( options && options.format !== undefined ) {
158 format = options.format;
159 } else {
160 format = parserDefaults.format;
161 }
162
163 return function () {
164 var failableResult = failableParserFn( arguments );
165 if ( format === 'text' || format === 'escaped' ) {
166 return failableResult.text();
167 } else {
168 return failableResult.html();
169 }
170 };
171 };
172
173 /**
174 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
175 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
176 * e.g.
177 *
178 * $.fn.msg = mediaWiki.parser.getJqueryPlugin( options );
179 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
180 * $( 'p#headline' ).msg( 'hello-user', userlink );
181 *
182 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
183 * somefunction( a, b, c, d )
184 * is equivalent to
185 * somefunction( a, [b, c, d] )
186 *
187 * We append to 'this', which in a jQuery plugin context will be the selected elements.
188 *
189 * @param {Object} options Parser options
190 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
191 * @return {string} return.key Message key.
192 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
193 * @return {jQuery} return.return
194 */
195 mw.jqueryMsg.getPlugin = function ( options ) {
196 var failableParserFn = getFailableParserFn( options );
197
198 return function () {
199 var $target = this.empty();
200 // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
201 // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
202 $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
203 appendWithoutParsing( $target, node );
204 } );
205 return $target;
206 };
207 };
208
209 /**
210 * The parser itself.
211 * Describes an object, whose primary duty is to .parse() message keys.
212 *
213 * @class
214 * @private
215 * @param {Object} options
216 */
217 mw.jqueryMsg.parser = function ( options ) {
218 this.settings = $.extend( {}, parserDefaults, options );
219 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
220
221 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
222 };
223
224 mw.jqueryMsg.parser.prototype = {
225 /**
226 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
227 *
228 * In most cases, the message is a string so this is identical.
229 * (This is why we would like to move this functionality server-side).
230 *
231 * The two parts of the key are separated by colon. For example:
232 *
233 * "message-key:true": ast
234 *
235 * if they key is "message-key" and onlyCurlyBraceTransform is true.
236 *
237 * This cache is shared by all instances of mw.jqueryMsg.parser.
238 *
239 * NOTE: We promise, it's static - when you create this empty object
240 * in the prototype, each new instance of the class gets a reference
241 * to the same object.
242 *
243 * @static
244 * @property {Object}
245 */
246 astCache: {},
247
248 /**
249 * Where the magic happens.
250 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
251 * If an error is thrown, returns original key, and logs the error
252 * @param {string} key Message key.
253 * @param {Array} replacements Variable replacements for $1, $2... $n
254 * @return {jQuery}
255 */
256 parse: function ( key, replacements ) {
257 return this.emitter.emit( this.getAst( key ), replacements );
258 },
259
260 /**
261 * Fetch the message string associated with a key, return parsed structure. Memoized.
262 * Note that we pass '[' + key + ']' back for a missing message here.
263 * @param {string} key
264 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
265 */
266 getAst: function ( key ) {
267 var cacheKey = [key, this.settings.onlyCurlyBraceTransform].join( ':' ), wikiText;
268
269 if ( this.astCache[ cacheKey ] === undefined ) {
270 wikiText = this.settings.messages.get( key );
271 if ( typeof wikiText !== 'string' ) {
272 wikiText = '\\[' + key + '\\]';
273 }
274 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
275 }
276 return this.astCache[ cacheKey ];
277 },
278
279 /**
280 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
281 *
282 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
283 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
284 *
285 * @param {string} input Message string wikitext
286 * @throws Error
287 * @return {Mixed} abstract syntax tree
288 */
289 wikiTextToAst: function ( input ) {
290 var pos, settings = this.settings, concat = Array.prototype.concat,
291 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
292 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
293 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
294 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
295 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
296 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
297 templateContents, openTemplate, closeTemplate,
298 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result;
299
300 // Indicates current position in input as we parse through it.
301 // Shared among all parsing functions below.
302 pos = 0;
303
304 // =========================================================
305 // parsing combinators - could be a library on its own
306 // =========================================================
307
308 /**
309 * Try parsers until one works, if none work return null
310 * @private
311 * @param {Function[]} ps
312 * @return {string|null}
313 */
314 function choice( ps ) {
315 return function () {
316 var i, result;
317 for ( i = 0; i < ps.length; i++ ) {
318 result = ps[i]();
319 if ( result !== null ) {
320 return result;
321 }
322 }
323 return null;
324 };
325 }
326
327 /**
328 * Try several ps in a row, all must succeed or return null.
329 * This is the only eager one.
330 * @private
331 * @param {Function[]} ps
332 * @return {string|null}
333 */
334 function sequence( ps ) {
335 var i, res,
336 originalPos = pos,
337 result = [];
338 for ( i = 0; i < ps.length; i++ ) {
339 res = ps[i]();
340 if ( res === null ) {
341 pos = originalPos;
342 return null;
343 }
344 result.push( res );
345 }
346 return result;
347 }
348
349 /**
350 * Run the same parser over and over until it fails.
351 * Must succeed a minimum of n times or return null.
352 * @private
353 * @param {number} n
354 * @param {Function} p
355 * @return {string|null}
356 */
357 function nOrMore( n, p ) {
358 return function () {
359 var originalPos = pos,
360 result = [],
361 parsed = p();
362 while ( parsed !== null ) {
363 result.push( parsed );
364 parsed = p();
365 }
366 if ( result.length < n ) {
367 pos = originalPos;
368 return null;
369 }
370 return result;
371 };
372 }
373
374 /**
375 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
376 *
377 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
378 * May be some scoping issue
379 *
380 * @private
381 * @param {Function} p
382 * @param {Function} fn
383 * @return {string|null}
384 */
385 function transform( p, fn ) {
386 return function () {
387 var result = p();
388 return result === null ? null : fn( result );
389 };
390 }
391
392 /**
393 * Just make parsers out of simpler JS builtin types
394 * @private
395 * @param {string} s
396 * @return {Function}
397 * @return {string} return.return
398 */
399 function makeStringParser( s ) {
400 var len = s.length;
401 return function () {
402 var result = null;
403 if ( input.substr( pos, len ) === s ) {
404 result = s;
405 pos += len;
406 }
407 return result;
408 };
409 }
410
411 /**
412 * Makes a regex parser, given a RegExp object.
413 * The regex being passed in should start with a ^ to anchor it to the start
414 * of the string.
415 *
416 * @private
417 * @param {RegExp} regex anchored regex
418 * @return {Function} function to parse input based on the regex
419 */
420 function makeRegexParser( regex ) {
421 return function () {
422 var matches = input.substr( pos ).match( regex );
423 if ( matches === null ) {
424 return null;
425 }
426 pos += matches[0].length;
427 return matches[0];
428 };
429 }
430
431 // ===================================================================
432 // General patterns above this line -- wikitext specific parsers below
433 // ===================================================================
434
435 // Parsing functions follow. All parsing functions work like this:
436 // They don't accept any arguments.
437 // Instead, they just operate non destructively on the string 'input'
438 // As they can consume parts of the string, they advance the shared variable pos,
439 // and return tokens (or whatever else they want to return).
440 // some things are defined as closures and other things as ordinary functions
441 // converting everything to a closure makes it a lot harder to debug... errors pop up
442 // but some debuggers can't tell you exactly where they come from. Also the mutually
443 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
444 // This may be because, to save code, memoization was removed
445
446 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
447 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
448 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
449 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
450
451 backslash = makeStringParser( '\\' );
452 doubleQuote = makeStringParser( '"' );
453 singleQuote = makeStringParser( '\'' );
454 anyCharacter = makeRegexParser( /^./ );
455
456 openHtmlStartTag = makeStringParser( '<' );
457 optionalForwardSlash = makeRegexParser( /^\/?/ );
458 openHtmlEndTag = makeStringParser( '</' );
459 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
460 closeHtmlTag = makeRegexParser( /^\s*>/ );
461
462 function escapedLiteral() {
463 var result = sequence( [
464 backslash,
465 anyCharacter
466 ] );
467 return result === null ? null : result[1];
468 }
469 escapedOrLiteralWithoutSpace = choice( [
470 escapedLiteral,
471 regularLiteralWithoutSpace
472 ] );
473 escapedOrLiteralWithoutBar = choice( [
474 escapedLiteral,
475 regularLiteralWithoutBar
476 ] );
477 escapedOrRegularLiteral = choice( [
478 escapedLiteral,
479 regularLiteral
480 ] );
481 // Used to define "literals" without spaces, in space-delimited situations
482 function literalWithoutSpace() {
483 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
484 return result === null ? null : result.join( '' );
485 }
486 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
487 // it is not a literal in the parameter
488 function literalWithoutBar() {
489 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
490 return result === null ? null : result.join( '' );
491 }
492
493 // Used for wikilink page names. Like literalWithoutBar, but
494 // without allowing escapes.
495 function unescapedLiteralWithoutBar() {
496 var result = nOrMore( 1, regularLiteralWithoutBar )();
497 return result === null ? null : result.join( '' );
498 }
499
500 function literal() {
501 var result = nOrMore( 1, escapedOrRegularLiteral )();
502 return result === null ? null : result.join( '' );
503 }
504
505 function curlyBraceTransformExpressionLiteral() {
506 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
507 return result === null ? null : result.join( '' );
508 }
509
510 asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
511 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
512 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
513
514 whitespace = makeRegexParser( /^\s+/ );
515 dollar = makeStringParser( '$' );
516 digits = makeRegexParser( /^\d+/ );
517
518 function replacement() {
519 var result = sequence( [
520 dollar,
521 digits
522 ] );
523 if ( result === null ) {
524 return null;
525 }
526 return [ 'REPLACE', parseInt( result[1], 10 ) - 1 ];
527 }
528 openExtlink = makeStringParser( '[' );
529 closeExtlink = makeStringParser( ']' );
530 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
531 function extlink() {
532 var result, parsedResult;
533 result = null;
534 parsedResult = sequence( [
535 openExtlink,
536 nonWhitespaceExpression,
537 whitespace,
538 nOrMore( 1, expression ),
539 closeExtlink
540 ] );
541 if ( parsedResult !== null ) {
542 result = [ 'EXTLINK', parsedResult[1] ];
543 // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
544 // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
545 if ( parsedResult[3].length === 1 ) {
546 result.push( parsedResult[3][0] );
547 } else {
548 result.push( ['CONCAT'].concat( parsedResult[3] ) );
549 }
550 }
551 return result;
552 }
553 // this is the same as the above extlink, except that the url is being passed on as a parameter
554 function extLinkParam() {
555 var result = sequence( [
556 openExtlink,
557 dollar,
558 digits,
559 whitespace,
560 expression,
561 closeExtlink
562 ] );
563 if ( result === null ) {
564 return null;
565 }
566 return [ 'EXTLINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
567 }
568 openWikilink = makeStringParser( '[[' );
569 closeWikilink = makeStringParser( ']]' );
570 pipe = makeStringParser( '|' );
571
572 function template() {
573 var result = sequence( [
574 openTemplate,
575 templateContents,
576 closeTemplate
577 ] );
578 return result === null ? null : result[1];
579 }
580
581 wikilinkPage = choice( [
582 unescapedLiteralWithoutBar,
583 template
584 ] );
585
586 function pipedWikilink() {
587 var result = sequence( [
588 wikilinkPage,
589 pipe,
590 expression
591 ] );
592 return result === null ? null : [ result[0], result[2] ];
593 }
594
595 wikilinkContents = choice( [
596 pipedWikilink,
597 wikilinkPage // unpiped link
598 ] );
599
600 function wikilink() {
601 var result, parsedResult, parsedLinkContents;
602 result = null;
603
604 parsedResult = sequence( [
605 openWikilink,
606 wikilinkContents,
607 closeWikilink
608 ] );
609 if ( parsedResult !== null ) {
610 parsedLinkContents = parsedResult[1];
611 result = [ 'WIKILINK' ].concat( parsedLinkContents );
612 }
613 return result;
614 }
615
616 // TODO: Support data- if appropriate
617 function doubleQuotedHtmlAttributeValue() {
618 var parsedResult = sequence( [
619 doubleQuote,
620 htmlDoubleQuoteAttributeValue,
621 doubleQuote
622 ] );
623 return parsedResult === null ? null : parsedResult[1];
624 }
625
626 function singleQuotedHtmlAttributeValue() {
627 var parsedResult = sequence( [
628 singleQuote,
629 htmlSingleQuoteAttributeValue,
630 singleQuote
631 ] );
632 return parsedResult === null ? null : parsedResult[1];
633 }
634
635 function htmlAttribute() {
636 var parsedResult = sequence( [
637 whitespace,
638 asciiAlphabetLiteral,
639 htmlAttributeEquals,
640 choice( [
641 doubleQuotedHtmlAttributeValue,
642 singleQuotedHtmlAttributeValue
643 ] )
644 ] );
645 return parsedResult === null ? null : [parsedResult[1], parsedResult[3]];
646 }
647
648 /**
649 * Checks if HTML is allowed
650 *
651 * @param {string} startTagName HTML start tag name
652 * @param {string} endTagName HTML start tag name
653 * @param {Object} attributes array of consecutive key value pairs,
654 * with index 2 * n being a name and 2 * n + 1 the associated value
655 * @return {boolean} true if this is HTML is allowed, false otherwise
656 */
657 function isAllowedHtml( startTagName, endTagName, attributes ) {
658 var i, len, attributeName;
659
660 startTagName = startTagName.toLowerCase();
661 endTagName = endTagName.toLowerCase();
662 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
663 return false;
664 }
665
666 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
667 attributeName = attributes[i];
668 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
669 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[startTagName] || [] ) === -1 ) {
670 return false;
671 }
672 }
673
674 return true;
675 }
676
677 function htmlAttributes() {
678 var parsedResult = nOrMore( 0, htmlAttribute )();
679 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
680 return concat.apply( ['HTMLATTRIBUTES'], parsedResult );
681 }
682
683 // Subset of allowed HTML markup.
684 // Most elements and many attributes allowed on the server are not supported yet.
685 function html() {
686 var result = null, parsedOpenTagResult, parsedHtmlContents,
687 parsedCloseTagResult, wrappedAttributes, attributes,
688 startTagName, endTagName, startOpenTagPos, startCloseTagPos,
689 endOpenTagPos, endCloseTagPos;
690
691 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
692 // 1. open through closeHtmlTag
693 // 2. expression
694 // 3. openHtmlEnd through close
695 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
696
697 startOpenTagPos = pos;
698 parsedOpenTagResult = sequence( [
699 openHtmlStartTag,
700 asciiAlphabetLiteral,
701 htmlAttributes,
702 optionalForwardSlash,
703 closeHtmlTag
704 ] );
705
706 if ( parsedOpenTagResult === null ) {
707 return null;
708 }
709
710 endOpenTagPos = pos;
711 startTagName = parsedOpenTagResult[1];
712
713 parsedHtmlContents = nOrMore( 0, expression )();
714
715 startCloseTagPos = pos;
716 parsedCloseTagResult = sequence( [
717 openHtmlEndTag,
718 asciiAlphabetLiteral,
719 closeHtmlTag
720 ] );
721
722 if ( parsedCloseTagResult === null ) {
723 // Closing tag failed. Return the start tag and contents.
724 return [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ]
725 .concat( parsedHtmlContents );
726 }
727
728 endCloseTagPos = pos;
729 endTagName = parsedCloseTagResult[1];
730 wrappedAttributes = parsedOpenTagResult[2];
731 attributes = wrappedAttributes.slice( 1 );
732 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
733 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
734 .concat( parsedHtmlContents );
735 } else {
736 // HTML is not allowed, so contents will remain how
737 // it was, while HTML markup at this level will be
738 // treated as text
739 // E.g. assuming script tags are not allowed:
740 //
741 // <script>[[Foo|bar]]</script>
742 //
743 // results in '&lt;script&gt;' and '&lt;/script&gt;'
744 // (not treated as an HTML tag), surrounding a fully
745 // parsed HTML link.
746 //
747 // Concatenate everything from the tag, flattening the contents.
748 result = [ 'CONCAT', input.substring( startOpenTagPos, endOpenTagPos ) ]
749 .concat( parsedHtmlContents, input.substring( startCloseTagPos, endCloseTagPos ) );
750 }
751
752 return result;
753 }
754
755 templateName = transform(
756 // see $wgLegalTitleChars
757 // not allowing : due to the need to catch "PLURAL:$1"
758 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
759 function ( result ) { return result.toString(); }
760 );
761 function templateParam() {
762 var expr, result;
763 result = sequence( [
764 pipe,
765 nOrMore( 0, paramExpression )
766 ] );
767 if ( result === null ) {
768 return null;
769 }
770 expr = result[1];
771 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
772 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[0];
773 }
774
775 function templateWithReplacement() {
776 var result = sequence( [
777 templateName,
778 colon,
779 replacement
780 ] );
781 return result === null ? null : [ result[0], result[2] ];
782 }
783 function templateWithOutReplacement() {
784 var result = sequence( [
785 templateName,
786 colon,
787 paramExpression
788 ] );
789 return result === null ? null : [ result[0], result[2] ];
790 }
791 function templateWithOutFirstParameter() {
792 var result = sequence( [
793 templateName,
794 colon
795 ] );
796 return result === null ? null : [ result[0], '' ];
797 }
798 colon = makeStringParser( ':' );
799 templateContents = choice( [
800 function () {
801 var res = sequence( [
802 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
803 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
804 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
805 nOrMore( 0, templateParam )
806 ] );
807 return res === null ? null : res[0].concat( res[1] );
808 },
809 function () {
810 var res = sequence( [
811 templateName,
812 nOrMore( 0, templateParam )
813 ] );
814 if ( res === null ) {
815 return null;
816 }
817 return [ res[0] ].concat( res[1] );
818 }
819 ] );
820 openTemplate = makeStringParser( '{{' );
821 closeTemplate = makeStringParser( '}}' );
822 nonWhitespaceExpression = choice( [
823 template,
824 wikilink,
825 extLinkParam,
826 extlink,
827 replacement,
828 literalWithoutSpace
829 ] );
830 paramExpression = choice( [
831 template,
832 wikilink,
833 extLinkParam,
834 extlink,
835 replacement,
836 literalWithoutBar
837 ] );
838
839 expression = choice( [
840 template,
841 wikilink,
842 extLinkParam,
843 extlink,
844 replacement,
845 html,
846 literal
847 ] );
848
849 // Used when only {{-transformation is wanted, for 'text'
850 // or 'escaped' formats
851 curlyBraceTransformExpression = choice( [
852 template,
853 replacement,
854 curlyBraceTransformExpressionLiteral
855 ] );
856
857 /**
858 * Starts the parse
859 *
860 * @param {Function} rootExpression root parse function
861 */
862 function start( rootExpression ) {
863 var result = nOrMore( 0, rootExpression )();
864 if ( result === null ) {
865 return null;
866 }
867 return [ 'CONCAT' ].concat( result );
868 }
869 // everything above this point is supposed to be stateless/static, but
870 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
871 // finally let's do some actual work...
872
873 // If you add another possible rootExpression, you must update the astCache key scheme.
874 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
875
876 /*
877 * For success, the p must have gotten to the end of the input
878 * and returned a non-null.
879 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
880 */
881 if ( result === null || pos !== input.length ) {
882 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
883 }
884 return result;
885 }
886
887 };
888
889 /**
890 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
891 */
892 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
893 this.language = language;
894 var jmsg = this;
895 $.each( magic, function ( key, val ) {
896 jmsg[ key.toLowerCase() ] = function () {
897 return val;
898 };
899 } );
900
901 /**
902 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
903 * Walk entire node structure, applying replacements and template functions when appropriate
904 * @param {Mixed} node Abstract syntax tree (top node or subnode)
905 * @param {Array} replacements for $1, $2, ... $n
906 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
907 */
908 this.emit = function ( node, replacements ) {
909 var ret, subnodes, operation,
910 jmsg = this;
911 switch ( typeof node ) {
912 case 'string':
913 case 'number':
914 ret = node;
915 break;
916 // typeof returns object for arrays
917 case 'object':
918 // node is an array of nodes
919 subnodes = $.map( node.slice( 1 ), function ( n ) {
920 return jmsg.emit( n, replacements );
921 } );
922 operation = node[0].toLowerCase();
923 if ( typeof jmsg[operation] === 'function' ) {
924 ret = jmsg[ operation ]( subnodes, replacements );
925 } else {
926 throw new Error( 'Unknown operation "' + operation + '"' );
927 }
928 break;
929 case 'undefined':
930 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
931 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
932 // The logical thing is probably to return the empty string here when we encounter undefined.
933 ret = '';
934 break;
935 default:
936 throw new Error( 'Unexpected type in AST: ' + typeof node );
937 }
938 return ret;
939 };
940 };
941
942 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
943 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
944 // If you have 'magic words' then configure the parser to have them upon creation.
945 //
946 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
947 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
948 mw.jqueryMsg.htmlEmitter.prototype = {
949 /**
950 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
951 * Must return a single node to parents -- a jQuery with synthetic span
952 * However, unwrap any other synthetic spans in our children and pass them upwards
953 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
954 * @return {jQuery}
955 */
956 concat: function ( nodes ) {
957 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
958 $.each( nodes, function ( i, node ) {
959 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
960 $.each( node.contents(), function ( j, childNode ) {
961 appendWithoutParsing( $span, childNode );
962 } );
963 } else {
964 // Let jQuery append nodes, arrays of nodes and jQuery objects
965 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
966 appendWithoutParsing( $span, node );
967 }
968 } );
969 return $span;
970 },
971
972 /**
973 * Return escaped replacement of correct index, or string if unavailable.
974 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
975 * if the specified parameter is not found return the same string
976 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
977 *
978 * TODO: Throw error if nodes.length > 1 ?
979 *
980 * @param {Array} nodes List of one element, integer, n >= 0
981 * @param {Array} replacements List of at least n strings
982 * @return {String} replacement
983 */
984 replace: function ( nodes, replacements ) {
985 var index = parseInt( nodes[0], 10 );
986
987 if ( index < replacements.length ) {
988 return replacements[index];
989 } else {
990 // index not found, fallback to displaying variable
991 return '$' + ( index + 1 );
992 }
993 },
994
995 /**
996 * Transform wiki-link
997 *
998 * TODO:
999 * It only handles basic cases, either no pipe, or a pipe with an explicit
1000 * anchor.
1001 *
1002 * It does not attempt to handle features like the pipe trick.
1003 * However, the pipe trick should usually not be present in wikitext retrieved
1004 * from the server, since the replacement is done at save time.
1005 * It may, though, if the wikitext appears in extension-controlled content.
1006 *
1007 * @param nodes
1008 */
1009 wikilink: function ( nodes ) {
1010 var page, anchor, url;
1011
1012 page = nodes[0];
1013 url = mw.util.getUrl( page );
1014
1015 // [[Some Page]] or [[Namespace:Some Page]]
1016 if ( nodes.length === 1 ) {
1017 anchor = page;
1018 }
1019
1020 /*
1021 * [[Some Page|anchor text]] or
1022 * [[Namespace:Some Page|anchor]
1023 */
1024 else {
1025 anchor = nodes[1];
1026 }
1027
1028 return $( '<a>' ).attr( {
1029 title: page,
1030 href: url
1031 } ).text( anchor );
1032 },
1033
1034 /**
1035 * Converts array of HTML element key value pairs to object
1036 *
1037 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1038 * name and 2 * n + 1 the associated value
1039 * @return {Object} Object mapping attribute name to attribute value
1040 */
1041 htmlattributes: function ( nodes ) {
1042 var i, len, mapping = {};
1043 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1044 mapping[nodes[i]] = decodePrimaryHtmlEntities( nodes[i + 1] );
1045 }
1046 return mapping;
1047 },
1048
1049 /**
1050 * Handles an (already-validated) HTML element.
1051 *
1052 * @param {Array} nodes Nodes to process when creating element
1053 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1054 */
1055 htmlelement: function ( nodes ) {
1056 var tagName, attributes, contents, $element;
1057
1058 tagName = nodes.shift();
1059 attributes = nodes.shift();
1060 contents = nodes;
1061 $element = $( document.createElement( tagName ) ).attr( attributes );
1062 return appendWithoutParsing( $element, contents );
1063 },
1064
1065 /**
1066 * Transform parsed structure into external link
1067 * If the href is a jQuery object, treat it as "enclosing" the link text.
1068 *
1069 * - ... function, treat it as the click handler.
1070 * - ... string, treat it as a URI.
1071 *
1072 * TODO: throw an error if nodes.length > 2 ?
1073 *
1074 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {String}
1075 * @return {jQuery}
1076 */
1077 extlink: function ( nodes ) {
1078 var $el,
1079 arg = nodes[0],
1080 contents = nodes[1];
1081 if ( arg instanceof jQuery ) {
1082 $el = arg;
1083 } else {
1084 $el = $( '<a>' );
1085 if ( typeof arg === 'function' ) {
1086 $el.click( arg ).attr( 'href', '#' );
1087 } else {
1088 $el.attr( 'href', arg.toString() );
1089 }
1090 }
1091 return appendWithoutParsing( $el, contents );
1092 },
1093
1094 /**
1095 * This is basically use a combination of replace + external link (link with parameter
1096 * as url), but we don't want to run the regular replace here-on: inserting a
1097 * url as href-attribute of a link will automatically escape it already, so
1098 * we don't want replace to (manually) escape it as well.
1099 *
1100 * TODO: throw error if nodes.length > 1 ?
1101 *
1102 * @param {Array} nodes List of one element, integer, n >= 0
1103 * @param {Array} replacements List of at least n strings
1104 * @return {string} replacement
1105 */
1106 extlinkparam: function ( nodes, replacements ) {
1107 var replacement,
1108 index = parseInt( nodes[0], 10 );
1109 if ( index < replacements.length ) {
1110 replacement = replacements[index];
1111 } else {
1112 replacement = '$' + ( index + 1 );
1113 }
1114 return this.extlink( [ replacement, nodes[1] ] );
1115 },
1116
1117 /**
1118 * Transform parsed structure into pluralization
1119 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1120 * So convert it back with the current language's convertNumber.
1121 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1122 * @return {string} selected pluralized form according to current language
1123 */
1124 plural: function ( nodes ) {
1125 var forms, count;
1126 count = parseFloat( this.language.convertNumber( nodes[0], true ) );
1127 forms = nodes.slice( 1 );
1128 return forms.length ? this.language.convertPlural( count, forms ) : '';
1129 },
1130
1131 /**
1132 * Transform parsed structure according to gender.
1133 *
1134 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1135 *
1136 * The first node must be one of:
1137 * - the mw.user object (or a compatible one)
1138 * - an empty string - indicating the current user, same effect as passing the mw.user object
1139 * - a gender string ('male', 'female' or 'unknown')
1140 *
1141 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1142 * @return {string} Selected gender form according to current language
1143 */
1144 gender: function ( nodes ) {
1145 var gender,
1146 maybeUser = nodes[0],
1147 forms = nodes.slice( 1 );
1148
1149 if ( maybeUser === '' ) {
1150 maybeUser = mw.user;
1151 }
1152
1153 // If we are passed a mw.user-like object, check their gender.
1154 // Otherwise, assume the gender string itself was passed .
1155 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1156 gender = maybeUser.options.get( 'gender' );
1157 } else {
1158 gender = maybeUser;
1159 }
1160
1161 return this.language.gender( gender, forms );
1162 },
1163
1164 /**
1165 * Transform parsed structure into grammar conversion.
1166 * Invoked by putting `{{grammar:form|word}}` in a message
1167 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1168 * @return {string} selected grammatical form according to current language
1169 */
1170 grammar: function ( nodes ) {
1171 var form = nodes[0],
1172 word = nodes[1];
1173 return word && form && this.language.convertGrammar( word, form );
1174 },
1175
1176 /**
1177 * Tranform parsed structure into a int: (interface language) message include
1178 * Invoked by putting `{{int:othermessage}}` into a message
1179 * @param {Array} nodes List of nodes
1180 * @return {string} Other message
1181 */
1182 int: function ( nodes ) {
1183 return mw.jqueryMsg.getMessageFunction()( nodes[0].toLowerCase() );
1184 },
1185
1186 /**
1187 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1188 * and outputs it in the localized digit script and formatted with decimal
1189 * separator, according to the current language.
1190 * @param {Array} nodes List of nodes
1191 * @return {number|string} Formatted number
1192 */
1193 formatnum: function ( nodes ) {
1194 var isInteger = ( nodes[1] && nodes[1] === 'R' ) ? true : false,
1195 number = nodes[0];
1196
1197 return this.language.convertNumber( number, isInteger );
1198 }
1199 };
1200
1201 // Deprecated! don't rely on gM existing.
1202 // The window.gM ought not to be required - or if required, not required here.
1203 // But moving it to extensions breaks it (?!)
1204 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1205 // @deprecated since 1.23
1206 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead' );
1207
1208 /**
1209 * @method
1210 * @member jQuery
1211 * @see mw.jqueryMsg#getPlugin
1212 */
1213 $.fn.msg = mw.jqueryMsg.getPlugin();
1214
1215 // Replace the default message parser with jqueryMsg
1216 oldParser = mw.Message.prototype.parser;
1217 mw.Message.prototype.parser = function () {
1218 var messageFunction;
1219
1220 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1221 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1222 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1223 // Do not use mw.jqueryMsg unless required
1224 if ( this.format === 'plain' || !/\{\{|[\[<>]/.test( this.map.get( this.key ) ) ) {
1225 // Fall back to mw.msg's simple parser
1226 return oldParser.apply( this );
1227 }
1228
1229 messageFunction = mw.jqueryMsg.getMessageFunction( {
1230 'messages': this.map,
1231 // For format 'escaped', escaping part is handled by mediawiki.js
1232 'format': this.format
1233 } );
1234 return messageFunction( this.key, this.parameters );
1235 };
1236
1237 }( mediaWiki, jQuery ) );