Export allowed tags from Sanitizer to mediawiki.jqueryMsg
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 'SITENAME': mw.config.get( 'wgSiteName' )
19 },
20 // This is a whitelist like Sanitizer.php.
21 // Self-closing tags are not currently supported.
22 // The simplified default here is overridden below by data supplied
23 // by the mediawiki.jqueryMsg.data module.
24 allowedHtmlElements: [
25 'b',
26 'i'
27 ],
28 // Key tag name, value allowed attributes for that tag.
29 // See Sanitizer::setupAttributeWhitelist
30 allowedHtmlCommonAttributes: [
31 // HTML
32 'id',
33 'class',
34 'style',
35 'lang',
36 'dir',
37 'title',
38
39 // WAI-ARIA
40 'role'
41 ],
42
43 // Attributes allowed for specific elements.
44 // Key is element name in lower case
45 // Value is array of allowed attributes for that element
46 allowedHtmlAttributesByElement: {},
47 messages: mw.messages,
48 language: mw.language,
49
50 // Same meaning as in mediawiki.js.
51 //
52 // Only 'text', 'parse', and 'escaped' are supported, and the
53 // actual escaping for 'escaped' is done by other code (generally
54 // through mediawiki.js).
55 //
56 // However, note that this default only
57 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
58 // is 'text', including when it uses jqueryMsg.
59 format: 'parse'
60
61 };
62
63 /**
64 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
65 * convert what it detects as an htmlString to an element.
66 *
67 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
68 *
69 * @private
70 * @param {jQuery} $parent Parent node wrapped by jQuery
71 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
72 * @return {jQuery} $parent
73 */
74 function appendWithoutParsing( $parent, children ) {
75 var i, len;
76
77 if ( !$.isArray( children ) ) {
78 children = [children];
79 }
80
81 for ( i = 0, len = children.length; i < len; i++ ) {
82 if ( typeof children[i] !== 'object' ) {
83 children[i] = document.createTextNode( children[i] );
84 }
85 }
86
87 return $parent.append( children );
88 }
89
90 /**
91 * Decodes the main HTML entities, those encoded by mw.html.escape.
92 *
93 * @private
94 * @param {string} encoded Encoded string
95 * @return {string} String with those entities decoded
96 */
97 function decodePrimaryHtmlEntities( encoded ) {
98 return encoded
99 .replace( /&#039;/g, '\'' )
100 .replace( /&quot;/g, '"' )
101 .replace( /&lt;/g, '<' )
102 .replace( /&gt;/g, '>' )
103 .replace( /&amp;/g, '&' );
104 }
105
106 /**
107 * Given parser options, return a function that parses a key and replacements, returning jQuery object
108 *
109 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
110 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
111 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
112 * @private
113 * @param {Object} options Parser options
114 * @return {Function}
115 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
116 * @return {jQuery} return.return
117 */
118 function getFailableParserFn( options ) {
119 var parser = new mw.jqueryMsg.parser( options );
120
121 return function ( args ) {
122 var fallback,
123 key = args[0],
124 argsArray = $.isArray( args[1] ) ? args[1] : slice.call( args, 1 );
125 try {
126 return parser.parse( key, argsArray );
127 } catch ( e ) {
128 fallback = parser.settings.messages.get( key );
129 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
130 return $( '<span>' ).text( fallback );
131 }
132 };
133 }
134
135 // Use data from mediawiki.jqueryMsg.data to override defaults, if
136 // available
137 if ( mw.jqueryMsg && mw.jqueryMsg.data ) {
138 if ( mw.jqueryMsg.data.allowedHtmlElements ) {
139 parserDefaults.allowedHtmlElements = mw.jqueryMsg.data.allowedHtmlElements;
140 }
141 }
142
143 mw.jqueryMsg = {};
144
145 /**
146 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
147 * e.g.
148 *
149 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
150 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
151 *
152 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
153 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
154 *
155 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
156 * somefunction( a, b, c, d )
157 * is equivalent to
158 * somefunction( a, [b, c, d] )
159 *
160 * @param {Object} options parser options
161 * @return {Function} Function suitable for assigning to window.gM
162 * @return {string} return.key Message key.
163 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
164 * @return {string} return.return Rendered HTML.
165 */
166 mw.jqueryMsg.getMessageFunction = function ( options ) {
167 var failableParserFn = getFailableParserFn( options ),
168 format;
169
170 if ( options && options.format !== undefined ) {
171 format = options.format;
172 } else {
173 format = parserDefaults.format;
174 }
175
176 return function () {
177 var failableResult = failableParserFn( arguments );
178 if ( format === 'text' || format === 'escaped' ) {
179 return failableResult.text();
180 } else {
181 return failableResult.html();
182 }
183 };
184 };
185
186 /**
187 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
188 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
189 * e.g.
190 *
191 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
192 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
193 * $( 'p#headline' ).msg( 'hello-user', userlink );
194 *
195 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
196 * somefunction( a, b, c, d )
197 * is equivalent to
198 * somefunction( a, [b, c, d] )
199 *
200 * We append to 'this', which in a jQuery plugin context will be the selected elements.
201 *
202 * @param {Object} options Parser options
203 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
204 * @return {string} return.key Message key.
205 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
206 * @return {jQuery} return.return
207 */
208 mw.jqueryMsg.getPlugin = function ( options ) {
209 var failableParserFn = getFailableParserFn( options );
210
211 return function () {
212 var $target = this.empty();
213 // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
214 // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
215 $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
216 appendWithoutParsing( $target, node );
217 } );
218 return $target;
219 };
220 };
221
222 /**
223 * The parser itself.
224 * Describes an object, whose primary duty is to .parse() message keys.
225 *
226 * @class
227 * @private
228 * @param {Object} options
229 */
230 mw.jqueryMsg.parser = function ( options ) {
231 this.settings = $.extend( {}, parserDefaults, options );
232 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
233
234 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
235 };
236
237 mw.jqueryMsg.parser.prototype = {
238 /**
239 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
240 *
241 * In most cases, the message is a string so this is identical.
242 * (This is why we would like to move this functionality server-side).
243 *
244 * The two parts of the key are separated by colon. For example:
245 *
246 * "message-key:true": ast
247 *
248 * if they key is "message-key" and onlyCurlyBraceTransform is true.
249 *
250 * This cache is shared by all instances of mw.jqueryMsg.parser.
251 *
252 * NOTE: We promise, it's static - when you create this empty object
253 * in the prototype, each new instance of the class gets a reference
254 * to the same object.
255 *
256 * @static
257 * @property {Object}
258 */
259 astCache: {},
260
261 /**
262 * Where the magic happens.
263 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
264 * If an error is thrown, returns original key, and logs the error
265 * @param {string} key Message key.
266 * @param {Array} replacements Variable replacements for $1, $2... $n
267 * @return {jQuery}
268 */
269 parse: function ( key, replacements ) {
270 return this.emitter.emit( this.getAst( key ), replacements );
271 },
272
273 /**
274 * Fetch the message string associated with a key, return parsed structure. Memoized.
275 * Note that we pass '[' + key + ']' back for a missing message here.
276 * @param {string} key
277 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
278 */
279 getAst: function ( key ) {
280 var wikiText,
281 cacheKey = [key, this.settings.onlyCurlyBraceTransform].join( ':' );
282
283 if ( this.astCache[ cacheKey ] === undefined ) {
284 wikiText = this.settings.messages.get( key );
285 if ( typeof wikiText !== 'string' ) {
286 wikiText = '\\[' + key + '\\]';
287 }
288 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
289 }
290 return this.astCache[ cacheKey ];
291 },
292
293 /**
294 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
295 *
296 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
297 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
298 *
299 * @param {string} input Message string wikitext
300 * @throws Error
301 * @return {Mixed} abstract syntax tree
302 */
303 wikiTextToAst: function ( input ) {
304 var pos,
305 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
306 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
307 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
308 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
309 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
310 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
311 templateContents, openTemplate, closeTemplate,
312 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
313 settings = this.settings,
314 concat = Array.prototype.concat;
315
316 // Indicates current position in input as we parse through it.
317 // Shared among all parsing functions below.
318 pos = 0;
319
320 // =========================================================
321 // parsing combinators - could be a library on its own
322 // =========================================================
323
324 /**
325 * Try parsers until one works, if none work return null
326 * @private
327 * @param {Function[]} ps
328 * @return {string|null}
329 */
330 function choice( ps ) {
331 return function () {
332 var i, result;
333 for ( i = 0; i < ps.length; i++ ) {
334 result = ps[i]();
335 if ( result !== null ) {
336 return result;
337 }
338 }
339 return null;
340 };
341 }
342
343 /**
344 * Try several ps in a row, all must succeed or return null.
345 * This is the only eager one.
346 * @private
347 * @param {Function[]} ps
348 * @return {string|null}
349 */
350 function sequence( ps ) {
351 var i, res,
352 originalPos = pos,
353 result = [];
354 for ( i = 0; i < ps.length; i++ ) {
355 res = ps[i]();
356 if ( res === null ) {
357 pos = originalPos;
358 return null;
359 }
360 result.push( res );
361 }
362 return result;
363 }
364
365 /**
366 * Run the same parser over and over until it fails.
367 * Must succeed a minimum of n times or return null.
368 * @private
369 * @param {number} n
370 * @param {Function} p
371 * @return {string|null}
372 */
373 function nOrMore( n, p ) {
374 return function () {
375 var originalPos = pos,
376 result = [],
377 parsed = p();
378 while ( parsed !== null ) {
379 result.push( parsed );
380 parsed = p();
381 }
382 if ( result.length < n ) {
383 pos = originalPos;
384 return null;
385 }
386 return result;
387 };
388 }
389
390 /**
391 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
392 *
393 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
394 * May be some scoping issue
395 *
396 * @private
397 * @param {Function} p
398 * @param {Function} fn
399 * @return {string|null}
400 */
401 function transform( p, fn ) {
402 return function () {
403 var result = p();
404 return result === null ? null : fn( result );
405 };
406 }
407
408 /**
409 * Just make parsers out of simpler JS builtin types
410 * @private
411 * @param {string} s
412 * @return {Function}
413 * @return {string} return.return
414 */
415 function makeStringParser( s ) {
416 var len = s.length;
417 return function () {
418 var result = null;
419 if ( input.substr( pos, len ) === s ) {
420 result = s;
421 pos += len;
422 }
423 return result;
424 };
425 }
426
427 /**
428 * Makes a regex parser, given a RegExp object.
429 * The regex being passed in should start with a ^ to anchor it to the start
430 * of the string.
431 *
432 * @private
433 * @param {RegExp} regex anchored regex
434 * @return {Function} function to parse input based on the regex
435 */
436 function makeRegexParser( regex ) {
437 return function () {
438 var matches = input.slice( pos ).match( regex );
439 if ( matches === null ) {
440 return null;
441 }
442 pos += matches[0].length;
443 return matches[0];
444 };
445 }
446
447 // ===================================================================
448 // General patterns above this line -- wikitext specific parsers below
449 // ===================================================================
450
451 // Parsing functions follow. All parsing functions work like this:
452 // They don't accept any arguments.
453 // Instead, they just operate non destructively on the string 'input'
454 // As they can consume parts of the string, they advance the shared variable pos,
455 // and return tokens (or whatever else they want to return).
456 // some things are defined as closures and other things as ordinary functions
457 // converting everything to a closure makes it a lot harder to debug... errors pop up
458 // but some debuggers can't tell you exactly where they come from. Also the mutually
459 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
460 // This may be because, to save code, memoization was removed
461
462 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
463 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
464 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
465 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
466
467 backslash = makeStringParser( '\\' );
468 doubleQuote = makeStringParser( '"' );
469 singleQuote = makeStringParser( '\'' );
470 anyCharacter = makeRegexParser( /^./ );
471
472 openHtmlStartTag = makeStringParser( '<' );
473 optionalForwardSlash = makeRegexParser( /^\/?/ );
474 openHtmlEndTag = makeStringParser( '</' );
475 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
476 closeHtmlTag = makeRegexParser( /^\s*>/ );
477
478 function escapedLiteral() {
479 var result = sequence( [
480 backslash,
481 anyCharacter
482 ] );
483 return result === null ? null : result[1];
484 }
485 escapedOrLiteralWithoutSpace = choice( [
486 escapedLiteral,
487 regularLiteralWithoutSpace
488 ] );
489 escapedOrLiteralWithoutBar = choice( [
490 escapedLiteral,
491 regularLiteralWithoutBar
492 ] );
493 escapedOrRegularLiteral = choice( [
494 escapedLiteral,
495 regularLiteral
496 ] );
497 // Used to define "literals" without spaces, in space-delimited situations
498 function literalWithoutSpace() {
499 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
500 return result === null ? null : result.join( '' );
501 }
502 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
503 // it is not a literal in the parameter
504 function literalWithoutBar() {
505 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
506 return result === null ? null : result.join( '' );
507 }
508
509 // Used for wikilink page names. Like literalWithoutBar, but
510 // without allowing escapes.
511 function unescapedLiteralWithoutBar() {
512 var result = nOrMore( 1, regularLiteralWithoutBar )();
513 return result === null ? null : result.join( '' );
514 }
515
516 function literal() {
517 var result = nOrMore( 1, escapedOrRegularLiteral )();
518 return result === null ? null : result.join( '' );
519 }
520
521 function curlyBraceTransformExpressionLiteral() {
522 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
523 return result === null ? null : result.join( '' );
524 }
525
526 asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
527 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
528 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
529
530 whitespace = makeRegexParser( /^\s+/ );
531 dollar = makeStringParser( '$' );
532 digits = makeRegexParser( /^\d+/ );
533
534 function replacement() {
535 var result = sequence( [
536 dollar,
537 digits
538 ] );
539 if ( result === null ) {
540 return null;
541 }
542 return [ 'REPLACE', parseInt( result[1], 10 ) - 1 ];
543 }
544 openExtlink = makeStringParser( '[' );
545 closeExtlink = makeStringParser( ']' );
546 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
547 function extlink() {
548 var result, parsedResult;
549 result = null;
550 parsedResult = sequence( [
551 openExtlink,
552 nonWhitespaceExpression,
553 whitespace,
554 nOrMore( 1, expression ),
555 closeExtlink
556 ] );
557 if ( parsedResult !== null ) {
558 result = [ 'EXTLINK', parsedResult[1] ];
559 // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
560 // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
561 if ( parsedResult[3].length === 1 ) {
562 result.push( parsedResult[3][0] );
563 } else {
564 result.push( ['CONCAT'].concat( parsedResult[3] ) );
565 }
566 }
567 return result;
568 }
569 // this is the same as the above extlink, except that the url is being passed on as a parameter
570 function extLinkParam() {
571 var result = sequence( [
572 openExtlink,
573 dollar,
574 digits,
575 whitespace,
576 expression,
577 closeExtlink
578 ] );
579 if ( result === null ) {
580 return null;
581 }
582 return [ 'EXTLINKPARAM', parseInt( result[2], 10 ) - 1, result[4] ];
583 }
584 openWikilink = makeStringParser( '[[' );
585 closeWikilink = makeStringParser( ']]' );
586 pipe = makeStringParser( '|' );
587
588 function template() {
589 var result = sequence( [
590 openTemplate,
591 templateContents,
592 closeTemplate
593 ] );
594 return result === null ? null : result[1];
595 }
596
597 wikilinkPage = choice( [
598 unescapedLiteralWithoutBar,
599 template
600 ] );
601
602 function pipedWikilink() {
603 var result = sequence( [
604 wikilinkPage,
605 pipe,
606 expression
607 ] );
608 return result === null ? null : [ result[0], result[2] ];
609 }
610
611 wikilinkContents = choice( [
612 pipedWikilink,
613 wikilinkPage // unpiped link
614 ] );
615
616 function wikilink() {
617 var result, parsedResult, parsedLinkContents;
618 result = null;
619
620 parsedResult = sequence( [
621 openWikilink,
622 wikilinkContents,
623 closeWikilink
624 ] );
625 if ( parsedResult !== null ) {
626 parsedLinkContents = parsedResult[1];
627 result = [ 'WIKILINK' ].concat( parsedLinkContents );
628 }
629 return result;
630 }
631
632 // TODO: Support data- if appropriate
633 function doubleQuotedHtmlAttributeValue() {
634 var parsedResult = sequence( [
635 doubleQuote,
636 htmlDoubleQuoteAttributeValue,
637 doubleQuote
638 ] );
639 return parsedResult === null ? null : parsedResult[1];
640 }
641
642 function singleQuotedHtmlAttributeValue() {
643 var parsedResult = sequence( [
644 singleQuote,
645 htmlSingleQuoteAttributeValue,
646 singleQuote
647 ] );
648 return parsedResult === null ? null : parsedResult[1];
649 }
650
651 function htmlAttribute() {
652 var parsedResult = sequence( [
653 whitespace,
654 asciiAlphabetLiteral,
655 htmlAttributeEquals,
656 choice( [
657 doubleQuotedHtmlAttributeValue,
658 singleQuotedHtmlAttributeValue
659 ] )
660 ] );
661 return parsedResult === null ? null : [parsedResult[1], parsedResult[3]];
662 }
663
664 /**
665 * Checks if HTML is allowed
666 *
667 * @param {string} startTagName HTML start tag name
668 * @param {string} endTagName HTML start tag name
669 * @param {Object} attributes array of consecutive key value pairs,
670 * with index 2 * n being a name and 2 * n + 1 the associated value
671 * @return {boolean} true if this is HTML is allowed, false otherwise
672 */
673 function isAllowedHtml( startTagName, endTagName, attributes ) {
674 var i, len, attributeName;
675
676 startTagName = startTagName.toLowerCase();
677 endTagName = endTagName.toLowerCase();
678 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
679 return false;
680 }
681
682 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
683 attributeName = attributes[i];
684 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
685 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[startTagName] || [] ) === -1 ) {
686 return false;
687 }
688 }
689
690 return true;
691 }
692
693 function htmlAttributes() {
694 var parsedResult = nOrMore( 0, htmlAttribute )();
695 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
696 return concat.apply( ['HTMLATTRIBUTES'], parsedResult );
697 }
698
699 // Subset of allowed HTML markup.
700 // Most elements and many attributes allowed on the server are not supported yet.
701 function html() {
702 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
703 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
704 startCloseTagPos, endOpenTagPos, endCloseTagPos,
705 result = null;
706
707 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
708 // 1. open through closeHtmlTag
709 // 2. expression
710 // 3. openHtmlEnd through close
711 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
712
713 startOpenTagPos = pos;
714 parsedOpenTagResult = sequence( [
715 openHtmlStartTag,
716 asciiAlphabetLiteral,
717 htmlAttributes,
718 optionalForwardSlash,
719 closeHtmlTag
720 ] );
721
722 if ( parsedOpenTagResult === null ) {
723 return null;
724 }
725
726 endOpenTagPos = pos;
727 startTagName = parsedOpenTagResult[1];
728
729 parsedHtmlContents = nOrMore( 0, expression )();
730
731 startCloseTagPos = pos;
732 parsedCloseTagResult = sequence( [
733 openHtmlEndTag,
734 asciiAlphabetLiteral,
735 closeHtmlTag
736 ] );
737
738 if ( parsedCloseTagResult === null ) {
739 // Closing tag failed. Return the start tag and contents.
740 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
741 .concat( parsedHtmlContents );
742 }
743
744 endCloseTagPos = pos;
745 endTagName = parsedCloseTagResult[1];
746 wrappedAttributes = parsedOpenTagResult[2];
747 attributes = wrappedAttributes.slice( 1 );
748 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
749 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
750 .concat( parsedHtmlContents );
751 } else {
752 // HTML is not allowed, so contents will remain how
753 // it was, while HTML markup at this level will be
754 // treated as text
755 // E.g. assuming script tags are not allowed:
756 //
757 // <script>[[Foo|bar]]</script>
758 //
759 // results in '&lt;script&gt;' and '&lt;/script&gt;'
760 // (not treated as an HTML tag), surrounding a fully
761 // parsed HTML link.
762 //
763 // Concatenate everything from the tag, flattening the contents.
764 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
765 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
766 }
767
768 return result;
769 }
770
771 templateName = transform(
772 // see $wgLegalTitleChars
773 // not allowing : due to the need to catch "PLURAL:$1"
774 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
775 function ( result ) { return result.toString(); }
776 );
777 function templateParam() {
778 var expr, result;
779 result = sequence( [
780 pipe,
781 nOrMore( 0, paramExpression )
782 ] );
783 if ( result === null ) {
784 return null;
785 }
786 expr = result[1];
787 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
788 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[0];
789 }
790
791 function templateWithReplacement() {
792 var result = sequence( [
793 templateName,
794 colon,
795 replacement
796 ] );
797 return result === null ? null : [ result[0], result[2] ];
798 }
799 function templateWithOutReplacement() {
800 var result = sequence( [
801 templateName,
802 colon,
803 paramExpression
804 ] );
805 return result === null ? null : [ result[0], result[2] ];
806 }
807 function templateWithOutFirstParameter() {
808 var result = sequence( [
809 templateName,
810 colon
811 ] );
812 return result === null ? null : [ result[0], '' ];
813 }
814 colon = makeStringParser( ':' );
815 templateContents = choice( [
816 function () {
817 var res = sequence( [
818 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
819 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
820 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
821 nOrMore( 0, templateParam )
822 ] );
823 return res === null ? null : res[0].concat( res[1] );
824 },
825 function () {
826 var res = sequence( [
827 templateName,
828 nOrMore( 0, templateParam )
829 ] );
830 if ( res === null ) {
831 return null;
832 }
833 return [ res[0] ].concat( res[1] );
834 }
835 ] );
836 openTemplate = makeStringParser( '{{' );
837 closeTemplate = makeStringParser( '}}' );
838 nonWhitespaceExpression = choice( [
839 template,
840 wikilink,
841 extLinkParam,
842 extlink,
843 replacement,
844 literalWithoutSpace
845 ] );
846 paramExpression = choice( [
847 template,
848 wikilink,
849 extLinkParam,
850 extlink,
851 replacement,
852 literalWithoutBar
853 ] );
854
855 expression = choice( [
856 template,
857 wikilink,
858 extLinkParam,
859 extlink,
860 replacement,
861 html,
862 literal
863 ] );
864
865 // Used when only {{-transformation is wanted, for 'text'
866 // or 'escaped' formats
867 curlyBraceTransformExpression = choice( [
868 template,
869 replacement,
870 curlyBraceTransformExpressionLiteral
871 ] );
872
873 /**
874 * Starts the parse
875 *
876 * @param {Function} rootExpression root parse function
877 */
878 function start( rootExpression ) {
879 var result = nOrMore( 0, rootExpression )();
880 if ( result === null ) {
881 return null;
882 }
883 return [ 'CONCAT' ].concat( result );
884 }
885 // everything above this point is supposed to be stateless/static, but
886 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
887 // finally let's do some actual work...
888
889 // If you add another possible rootExpression, you must update the astCache key scheme.
890 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
891
892 /*
893 * For success, the p must have gotten to the end of the input
894 * and returned a non-null.
895 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
896 */
897 if ( result === null || pos !== input.length ) {
898 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
899 }
900 return result;
901 }
902
903 };
904
905 /**
906 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
907 */
908 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
909 this.language = language;
910 var jmsg = this;
911 $.each( magic, function ( key, val ) {
912 jmsg[ key.toLowerCase() ] = function () {
913 return val;
914 };
915 } );
916
917 /**
918 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
919 * Walk entire node structure, applying replacements and template functions when appropriate
920 * @param {Mixed} node Abstract syntax tree (top node or subnode)
921 * @param {Array} replacements for $1, $2, ... $n
922 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
923 */
924 this.emit = function ( node, replacements ) {
925 var ret, subnodes, operation,
926 jmsg = this;
927 switch ( typeof node ) {
928 case 'string':
929 case 'number':
930 ret = node;
931 break;
932 // typeof returns object for arrays
933 case 'object':
934 // node is an array of nodes
935 subnodes = $.map( node.slice( 1 ), function ( n ) {
936 return jmsg.emit( n, replacements );
937 } );
938 operation = node[0].toLowerCase();
939 if ( typeof jmsg[operation] === 'function' ) {
940 ret = jmsg[ operation ]( subnodes, replacements );
941 } else {
942 throw new Error( 'Unknown operation "' + operation + '"' );
943 }
944 break;
945 case 'undefined':
946 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
947 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
948 // The logical thing is probably to return the empty string here when we encounter undefined.
949 ret = '';
950 break;
951 default:
952 throw new Error( 'Unexpected type in AST: ' + typeof node );
953 }
954 return ret;
955 };
956 };
957
958 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
959 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
960 // If you have 'magic words' then configure the parser to have them upon creation.
961 //
962 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
963 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
964 mw.jqueryMsg.htmlEmitter.prototype = {
965 /**
966 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
967 * Must return a single node to parents -- a jQuery with synthetic span
968 * However, unwrap any other synthetic spans in our children and pass them upwards
969 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
970 * @return {jQuery}
971 */
972 concat: function ( nodes ) {
973 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
974 $.each( nodes, function ( i, node ) {
975 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
976 $.each( node.contents(), function ( j, childNode ) {
977 appendWithoutParsing( $span, childNode );
978 } );
979 } else {
980 // Let jQuery append nodes, arrays of nodes and jQuery objects
981 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
982 appendWithoutParsing( $span, node );
983 }
984 } );
985 return $span;
986 },
987
988 /**
989 * Return escaped replacement of correct index, or string if unavailable.
990 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
991 * if the specified parameter is not found return the same string
992 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
993 *
994 * TODO: Throw error if nodes.length > 1 ?
995 *
996 * @param {Array} nodes List of one element, integer, n >= 0
997 * @param {Array} replacements List of at least n strings
998 * @return {String} replacement
999 */
1000 replace: function ( nodes, replacements ) {
1001 var index = parseInt( nodes[0], 10 );
1002
1003 if ( index < replacements.length ) {
1004 return replacements[index];
1005 } else {
1006 // index not found, fallback to displaying variable
1007 return '$' + ( index + 1 );
1008 }
1009 },
1010
1011 /**
1012 * Transform wiki-link
1013 *
1014 * TODO:
1015 * It only handles basic cases, either no pipe, or a pipe with an explicit
1016 * anchor.
1017 *
1018 * It does not attempt to handle features like the pipe trick.
1019 * However, the pipe trick should usually not be present in wikitext retrieved
1020 * from the server, since the replacement is done at save time.
1021 * It may, though, if the wikitext appears in extension-controlled content.
1022 *
1023 * @param nodes
1024 */
1025 wikilink: function ( nodes ) {
1026 var page, anchor, url;
1027
1028 page = nodes[0];
1029 url = mw.util.getUrl( page );
1030
1031 if ( nodes.length === 1 ) {
1032 // [[Some Page]] or [[Namespace:Some Page]]
1033 anchor = page;
1034 } else {
1035 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1036 anchor = nodes[1];
1037 }
1038
1039 return $( '<a>' ).attr( {
1040 title: page,
1041 href: url
1042 } ).text( anchor );
1043 },
1044
1045 /**
1046 * Converts array of HTML element key value pairs to object
1047 *
1048 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1049 * name and 2 * n + 1 the associated value
1050 * @return {Object} Object mapping attribute name to attribute value
1051 */
1052 htmlattributes: function ( nodes ) {
1053 var i, len, mapping = {};
1054 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1055 mapping[nodes[i]] = decodePrimaryHtmlEntities( nodes[i + 1] );
1056 }
1057 return mapping;
1058 },
1059
1060 /**
1061 * Handles an (already-validated) HTML element.
1062 *
1063 * @param {Array} nodes Nodes to process when creating element
1064 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1065 */
1066 htmlelement: function ( nodes ) {
1067 var tagName, attributes, contents, $element;
1068
1069 tagName = nodes.shift();
1070 attributes = nodes.shift();
1071 contents = nodes;
1072 $element = $( document.createElement( tagName ) ).attr( attributes );
1073 return appendWithoutParsing( $element, contents );
1074 },
1075
1076 /**
1077 * Transform parsed structure into external link
1078 * If the href is a jQuery object, treat it as "enclosing" the link text.
1079 *
1080 * - ... function, treat it as the click handler.
1081 * - ... string, treat it as a URI.
1082 *
1083 * TODO: throw an error if nodes.length > 2 ?
1084 *
1085 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {String}
1086 * @return {jQuery}
1087 */
1088 extlink: function ( nodes ) {
1089 var $el,
1090 arg = nodes[0],
1091 contents = nodes[1];
1092 if ( arg instanceof jQuery ) {
1093 $el = arg;
1094 } else {
1095 $el = $( '<a>' );
1096 if ( typeof arg === 'function' ) {
1097 $el.attr( 'href', '#' )
1098 .click( function ( e ) {
1099 e.preventDefault();
1100 } )
1101 .click( arg );
1102 } else {
1103 $el.attr( 'href', arg.toString() );
1104 }
1105 }
1106 return appendWithoutParsing( $el, contents );
1107 },
1108
1109 /**
1110 * This is basically use a combination of replace + external link (link with parameter
1111 * as url), but we don't want to run the regular replace here-on: inserting a
1112 * url as href-attribute of a link will automatically escape it already, so
1113 * we don't want replace to (manually) escape it as well.
1114 *
1115 * TODO: throw error if nodes.length > 1 ?
1116 *
1117 * @param {Array} nodes List of one element, integer, n >= 0
1118 * @param {Array} replacements List of at least n strings
1119 * @return {string} replacement
1120 */
1121 extlinkparam: function ( nodes, replacements ) {
1122 var replacement,
1123 index = parseInt( nodes[0], 10 );
1124 if ( index < replacements.length ) {
1125 replacement = replacements[index];
1126 } else {
1127 replacement = '$' + ( index + 1 );
1128 }
1129 return this.extlink( [ replacement, nodes[1] ] );
1130 },
1131
1132 /**
1133 * Transform parsed structure into pluralization
1134 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1135 * So convert it back with the current language's convertNumber.
1136 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1137 * @return {string} selected pluralized form according to current language
1138 */
1139 plural: function ( nodes ) {
1140 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1141 explicitPluralForms = {};
1142
1143 count = parseFloat( this.language.convertNumber( nodes[0], true ) );
1144 forms = nodes.slice( 1 );
1145 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1146 form = forms[formIndex];
1147
1148 if ( form.jquery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1149 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1150 firstChild = form.contents().get( 0 );
1151 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1152 firstChildText = firstChild.textContent;
1153 if ( /^\d+=/.test( firstChildText ) ) {
1154 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[0], 10 );
1155 // Use the digit part as key and rest of first text node and
1156 // rest of child nodes as value.
1157 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1158 explicitPluralForms[explicitPluralFormNumber] = form;
1159 forms[formIndex] = undefined;
1160 }
1161 }
1162 } else if ( /^\d+=/.test( form ) ) {
1163 // Simple explicit plural forms like 12=a dozen
1164 explicitPluralFormNumber = parseInt( form.split( /=/ )[0], 10 );
1165 explicitPluralForms[explicitPluralFormNumber] = form.slice( form.indexOf( '=' ) + 1 );
1166 forms[formIndex] = undefined;
1167 }
1168 }
1169
1170 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1171 forms = $.map( forms, function ( form ) {
1172 return form;
1173 } );
1174
1175 return this.language.convertPlural( count, forms, explicitPluralForms );
1176 },
1177
1178 /**
1179 * Transform parsed structure according to gender.
1180 *
1181 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1182 *
1183 * The first node must be one of:
1184 * - the mw.user object (or a compatible one)
1185 * - an empty string - indicating the current user, same effect as passing the mw.user object
1186 * - a gender string ('male', 'female' or 'unknown')
1187 *
1188 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1189 * @return {string} Selected gender form according to current language
1190 */
1191 gender: function ( nodes ) {
1192 var gender,
1193 maybeUser = nodes[0],
1194 forms = nodes.slice( 1 );
1195
1196 if ( maybeUser === '' ) {
1197 maybeUser = mw.user;
1198 }
1199
1200 // If we are passed a mw.user-like object, check their gender.
1201 // Otherwise, assume the gender string itself was passed .
1202 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1203 gender = maybeUser.options.get( 'gender' );
1204 } else {
1205 gender = maybeUser;
1206 }
1207
1208 return this.language.gender( gender, forms );
1209 },
1210
1211 /**
1212 * Transform parsed structure into grammar conversion.
1213 * Invoked by putting `{{grammar:form|word}}` in a message
1214 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1215 * @return {string} selected grammatical form according to current language
1216 */
1217 grammar: function ( nodes ) {
1218 var form = nodes[0],
1219 word = nodes[1];
1220 return word && form && this.language.convertGrammar( word, form );
1221 },
1222
1223 /**
1224 * Tranform parsed structure into a int: (interface language) message include
1225 * Invoked by putting `{{int:othermessage}}` into a message
1226 * @param {Array} nodes List of nodes
1227 * @return {string} Other message
1228 */
1229 'int': function ( nodes ) {
1230 return mw.jqueryMsg.getMessageFunction()( nodes[0].toLowerCase() );
1231 },
1232
1233 /**
1234 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1235 * and outputs it in the localized digit script and formatted with decimal
1236 * separator, according to the current language.
1237 * @param {Array} nodes List of nodes
1238 * @return {number|string} Formatted number
1239 */
1240 formatnum: function ( nodes ) {
1241 var isInteger = ( nodes[1] && nodes[1] === 'R' ) ? true : false,
1242 number = nodes[0];
1243
1244 return this.language.convertNumber( number, isInteger );
1245 }
1246 };
1247
1248 // Deprecated! don't rely on gM existing.
1249 // The window.gM ought not to be required - or if required, not required here.
1250 // But moving it to extensions breaks it (?!)
1251 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1252 // @deprecated since 1.23
1253 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1254
1255 /**
1256 * @method
1257 * @member jQuery
1258 * @see mw.jqueryMsg#getPlugin
1259 */
1260 $.fn.msg = mw.jqueryMsg.getPlugin();
1261
1262 // Replace the default message parser with jqueryMsg
1263 oldParser = mw.Message.prototype.parser;
1264 mw.Message.prototype.parser = function () {
1265 var messageFunction;
1266
1267 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1268 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1269 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1270 // Do not use mw.jqueryMsg unless required
1271 if ( this.format === 'plain' || !/\{\{|[\[<>]/.test( this.map.get( this.key ) ) ) {
1272 // Fall back to mw.msg's simple parser
1273 return oldParser.apply( this );
1274 }
1275
1276 messageFunction = mw.jqueryMsg.getMessageFunction( {
1277 'messages': this.map,
1278 // For format 'escaped', escaping part is handled by mediawiki.js
1279 'format': this.format
1280 } );
1281 return messageFunction( this.key, this.parameters );
1282 };
1283
1284 }( mediaWiki, jQuery ) );