Merge "Add first letter data for bn collation (Standard and Traditional)"
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 PAGENAME: mw.config.get( 'wgPageName' ),
19 PAGENAMEE: mw.util.wikiUrlencode( mw.config.get( 'wgPageName' ) ),
20 SITENAME: mw.config.get( 'wgSiteName' )
21 },
22 // Whitelist for allowed HTML elements in wikitext.
23 // Self-closing tags are not currently supported.
24 // Can be populated via setPrivateData().
25 allowedHtmlElements: [],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes: [
29 // HTML
30 'id',
31 'class',
32 'style',
33 'lang',
34 'dir',
35 'title',
36
37 // WAI-ARIA
38 'role'
39 ],
40
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement: {},
45 messages: mw.messages,
46 language: mw.language,
47
48 // Same meaning as in mediawiki.js.
49 //
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
53 //
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
57 format: 'parse'
58 };
59
60 /**
61 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
62 * convert what it detects as an htmlString to an element.
63 *
64 * If our own htmlEmitter jQuery object is given, its children will be unwrapped and appended to
65 * new parent.
66 *
67 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
68 *
69 * @private
70 * @param {jQuery} $parent Parent node wrapped by jQuery
71 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
72 * @return {jQuery} $parent
73 */
74 function appendWithoutParsing( $parent, children ) {
75 var i, len;
76
77 if ( !$.isArray( children ) ) {
78 children = [ children ];
79 }
80
81 for ( i = 0, len = children.length; i < len; i++ ) {
82 if ( typeof children[ i ] !== 'object' ) {
83 children[ i ] = document.createTextNode( children[ i ] );
84 }
85 if ( children[ i ] instanceof jQuery && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
86 children[ i ] = children[ i ].contents();
87 }
88 }
89
90 return $parent.append( children );
91 }
92
93 /**
94 * Decodes the main HTML entities, those encoded by mw.html.escape.
95 *
96 * @private
97 * @param {string} encoded Encoded string
98 * @return {string} String with those entities decoded
99 */
100 function decodePrimaryHtmlEntities( encoded ) {
101 return encoded
102 .replace( /&#039;/g, '\'' )
103 .replace( /&quot;/g, '"' )
104 .replace( /&lt;/g, '<' )
105 .replace( /&gt;/g, '>' )
106 .replace( /&amp;/g, '&' );
107 }
108
109 /**
110 * Turn input into a string.
111 *
112 * @private
113 * @param {string|jQuery} input
114 * @return {string} Textual value of input
115 */
116 function textify( input ) {
117 if ( input instanceof jQuery ) {
118 input = input.text();
119 }
120 return String( input );
121 }
122
123 /**
124 * Given parser options, return a function that parses a key and replacements, returning jQuery object
125 *
126 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
127 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
128 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
129 *
130 * @private
131 * @param {Object} options Parser options
132 * @return {Function}
133 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
134 * @return {jQuery} return.return
135 */
136 function getFailableParserFn( options ) {
137 return function ( args ) {
138 var fallback,
139 // eslint-disable-next-line new-cap
140 parser = new mw.jqueryMsg.parser( options ),
141 key = args[ 0 ],
142 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
143 try {
144 return parser.parse( key, argsArray );
145 } catch ( e ) {
146 fallback = parser.settings.messages.get( key );
147 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
148 mw.track( 'mediawiki.jqueryMsg.error', {
149 messageKey: key,
150 errorMessage: e.message
151 } );
152 return $( '<span>' ).text( fallback );
153 }
154 };
155 }
156
157 mw.jqueryMsg = {};
158
159 /**
160 * Initialize parser defaults.
161 *
162 * ResourceLoaderJqueryMsgModule calls this to provide default values from
163 * Sanitizer.php for allowed HTML elements. To override this data for individual
164 * parsers, pass the relevant options to mw.jqueryMsg.parser.
165 *
166 * @private
167 * @param {Object} data
168 */
169 mw.jqueryMsg.setParserDefaults = function ( data ) {
170 $.extend( parserDefaults, data );
171 };
172
173 /**
174 * Get current parser defaults.
175 *
176 * Primarily used for the unit test. Returns a copy.
177 *
178 * @private
179 * @return {Object}
180 */
181 mw.jqueryMsg.getParserDefaults = function () {
182 return $.extend( {}, parserDefaults );
183 };
184
185 /**
186 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
187 * e.g.
188 *
189 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
190 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
191 *
192 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
193 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
194 *
195 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
196 * somefunction( a, b, c, d )
197 * is equivalent to
198 * somefunction( a, [b, c, d] )
199 *
200 * @param {Object} options parser options
201 * @return {Function} Function suitable for assigning to window.gM
202 * @return {string} return.key Message key.
203 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
204 * @return {string} return.return Rendered HTML.
205 */
206 mw.jqueryMsg.getMessageFunction = function ( options ) {
207 var failableParserFn, format;
208
209 if ( options && options.format !== undefined ) {
210 format = options.format;
211 } else {
212 format = parserDefaults.format;
213 }
214
215 return function () {
216 var failableResult;
217 if ( !failableParserFn ) {
218 failableParserFn = getFailableParserFn( options );
219 }
220 failableResult = failableParserFn( arguments );
221 if ( format === 'text' || format === 'escaped' ) {
222 return failableResult.text();
223 } else {
224 return failableResult.html();
225 }
226 };
227 };
228
229 /**
230 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
231 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
232 * e.g.
233 *
234 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
235 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
236 * $( 'p#headline' ).msg( 'hello-user', userlink );
237 *
238 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
239 * somefunction( a, b, c, d )
240 * is equivalent to
241 * somefunction( a, [b, c, d] )
242 *
243 * We append to 'this', which in a jQuery plugin context will be the selected elements.
244 *
245 * @param {Object} options Parser options
246 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
247 * @return {string} return.key Message key.
248 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
249 * @return {jQuery} return.return
250 */
251 mw.jqueryMsg.getPlugin = function ( options ) {
252 var failableParserFn;
253
254 return function () {
255 var $target;
256 if ( !failableParserFn ) {
257 failableParserFn = getFailableParserFn( options );
258 }
259 $target = this.empty();
260 appendWithoutParsing( $target, failableParserFn( arguments ) );
261 return $target;
262 };
263 };
264
265 /**
266 * The parser itself.
267 * Describes an object, whose primary duty is to .parse() message keys.
268 *
269 * @class
270 * @private
271 * @param {Object} options
272 */
273 mw.jqueryMsg.parser = function ( options ) {
274 this.settings = $.extend( {}, parserDefaults, options );
275 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
276 this.astCache = {};
277
278 // eslint-disable-next-line new-cap
279 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
280 };
281
282 mw.jqueryMsg.parser.prototype = {
283 /**
284 * Where the magic happens.
285 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
286 * If an error is thrown, returns original key, and logs the error
287 *
288 * @param {string} key Message key.
289 * @param {Array} replacements Variable replacements for $1, $2... $n
290 * @return {jQuery}
291 */
292 parse: function ( key, replacements ) {
293 var ast = this.getAst( key );
294 return this.emitter.emit( ast, replacements );
295 },
296
297 /**
298 * Fetch the message string associated with a key, return parsed structure. Memoized.
299 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
300 *
301 * @param {string} key
302 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
303 */
304 getAst: function ( key ) {
305 var wikiText;
306
307 if ( !this.astCache.hasOwnProperty( key ) ) {
308 wikiText = this.settings.messages.get( key );
309 if ( typeof wikiText !== 'string' ) {
310 wikiText = '⧼' + key + '⧽';
311 }
312 this.astCache[ key ] = this.wikiTextToAst( wikiText );
313 }
314 return this.astCache[ key ];
315 },
316
317 /**
318 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
319 *
320 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
321 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
322 *
323 * @param {string} input Message string wikitext
324 * @throws Error
325 * @return {Mixed} abstract syntax tree
326 */
327 wikiTextToAst: function ( input ) {
328 var pos,
329 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
330 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
331 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
332 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
333 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
334 openExtlink, closeExtlink, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
335 templateContents, openTemplate, closeTemplate,
336 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
337 settings = this.settings,
338 concat = Array.prototype.concat;
339
340 // Indicates current position in input as we parse through it.
341 // Shared among all parsing functions below.
342 pos = 0;
343
344 // =========================================================
345 // parsing combinators - could be a library on its own
346 // =========================================================
347
348 /**
349 * Try parsers until one works, if none work return null
350 *
351 * @private
352 * @param {Function[]} ps
353 * @return {string|null}
354 */
355 function choice( ps ) {
356 return function () {
357 var i, result;
358 for ( i = 0; i < ps.length; i++ ) {
359 result = ps[ i ]();
360 if ( result !== null ) {
361 return result;
362 }
363 }
364 return null;
365 };
366 }
367
368 /**
369 * Try several ps in a row, all must succeed or return null.
370 * This is the only eager one.
371 *
372 * @private
373 * @param {Function[]} ps
374 * @return {string|null}
375 */
376 function sequence( ps ) {
377 var i, res,
378 originalPos = pos,
379 result = [];
380 for ( i = 0; i < ps.length; i++ ) {
381 res = ps[ i ]();
382 if ( res === null ) {
383 pos = originalPos;
384 return null;
385 }
386 result.push( res );
387 }
388 return result;
389 }
390
391 /**
392 * Run the same parser over and over until it fails.
393 * Must succeed a minimum of n times or return null.
394 *
395 * @private
396 * @param {number} n
397 * @param {Function} p
398 * @return {string|null}
399 */
400 function nOrMore( n, p ) {
401 return function () {
402 var originalPos = pos,
403 result = [],
404 parsed = p();
405 while ( parsed !== null ) {
406 result.push( parsed );
407 parsed = p();
408 }
409 if ( result.length < n ) {
410 pos = originalPos;
411 return null;
412 }
413 return result;
414 };
415 }
416
417 /**
418 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
419 *
420 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
421 * May be some scoping issue
422 *
423 * @private
424 * @param {Function} p
425 * @param {Function} fn
426 * @return {string|null}
427 */
428 function transform( p, fn ) {
429 return function () {
430 var result = p();
431 return result === null ? null : fn( result );
432 };
433 }
434
435 /**
436 * Just make parsers out of simpler JS builtin types
437 *
438 * @private
439 * @param {string} s
440 * @return {Function}
441 * @return {string} return.return
442 */
443 function makeStringParser( s ) {
444 var len = s.length;
445 return function () {
446 var result = null;
447 if ( input.substr( pos, len ) === s ) {
448 result = s;
449 pos += len;
450 }
451 return result;
452 };
453 }
454
455 /**
456 * Makes a regex parser, given a RegExp object.
457 * The regex being passed in should start with a ^ to anchor it to the start
458 * of the string.
459 *
460 * @private
461 * @param {RegExp} regex anchored regex
462 * @return {Function} function to parse input based on the regex
463 */
464 function makeRegexParser( regex ) {
465 return function () {
466 var matches = input.slice( pos ).match( regex );
467 if ( matches === null ) {
468 return null;
469 }
470 pos += matches[ 0 ].length;
471 return matches[ 0 ];
472 };
473 }
474
475 // ===================================================================
476 // General patterns above this line -- wikitext specific parsers below
477 // ===================================================================
478
479 // Parsing functions follow. All parsing functions work like this:
480 // They don't accept any arguments.
481 // Instead, they just operate non destructively on the string 'input'
482 // As they can consume parts of the string, they advance the shared variable pos,
483 // and return tokens (or whatever else they want to return).
484 // some things are defined as closures and other things as ordinary functions
485 // converting everything to a closure makes it a lot harder to debug... errors pop up
486 // but some debuggers can't tell you exactly where they come from. Also the mutually
487 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
488 // This may be because, to save code, memoization was removed
489
490 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
491 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
492 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
493 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
494
495 backslash = makeStringParser( '\\' );
496 doubleQuote = makeStringParser( '"' );
497 singleQuote = makeStringParser( '\'' );
498 anyCharacter = makeRegexParser( /^./ );
499
500 openHtmlStartTag = makeStringParser( '<' );
501 optionalForwardSlash = makeRegexParser( /^\/?/ );
502 openHtmlEndTag = makeStringParser( '</' );
503 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
504 closeHtmlTag = makeRegexParser( /^\s*>/ );
505
506 function escapedLiteral() {
507 var result = sequence( [
508 backslash,
509 anyCharacter
510 ] );
511 return result === null ? null : result[ 1 ];
512 }
513 escapedOrLiteralWithoutSpace = choice( [
514 escapedLiteral,
515 regularLiteralWithoutSpace
516 ] );
517 escapedOrLiteralWithoutBar = choice( [
518 escapedLiteral,
519 regularLiteralWithoutBar
520 ] );
521 escapedOrRegularLiteral = choice( [
522 escapedLiteral,
523 regularLiteral
524 ] );
525 // Used to define "literals" without spaces, in space-delimited situations
526 function literalWithoutSpace() {
527 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
528 return result === null ? null : result.join( '' );
529 }
530 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
531 // it is not a literal in the parameter
532 function literalWithoutBar() {
533 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
534 return result === null ? null : result.join( '' );
535 }
536
537 function literal() {
538 var result = nOrMore( 1, escapedOrRegularLiteral )();
539 return result === null ? null : result.join( '' );
540 }
541
542 function curlyBraceTransformExpressionLiteral() {
543 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
544 return result === null ? null : result.join( '' );
545 }
546
547 asciiAlphabetLiteral = makeRegexParser( /^[A-Za-z]+/ );
548 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
549 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
550
551 whitespace = makeRegexParser( /^\s+/ );
552 dollar = makeStringParser( '$' );
553 digits = makeRegexParser( /^\d+/ );
554
555 function replacement() {
556 var result = sequence( [
557 dollar,
558 digits
559 ] );
560 if ( result === null ) {
561 return null;
562 }
563 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
564 }
565 openExtlink = makeStringParser( '[' );
566 closeExtlink = makeStringParser( ']' );
567 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
568 function extlink() {
569 var result, parsedResult, target;
570 result = null;
571 parsedResult = sequence( [
572 openExtlink,
573 nOrMore( 1, nonWhitespaceExpression ),
574 whitespace,
575 nOrMore( 1, expression ),
576 closeExtlink
577 ] );
578 if ( parsedResult !== null ) {
579 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
580 // passing fancy parameters (like a whole jQuery object or a function) to use for the
581 // link. Check only if it's a single match, since we can either do CONCAT or not for
582 // singles with the same effect.
583 target = parsedResult[ 1 ].length === 1 ?
584 parsedResult[ 1 ][ 0 ] :
585 [ 'CONCAT' ].concat( parsedResult[ 1 ] );
586 result = [
587 'EXTLINK',
588 target,
589 [ 'CONCAT' ].concat( parsedResult[ 3 ] )
590 ];
591 }
592 return result;
593 }
594 openWikilink = makeStringParser( '[[' );
595 closeWikilink = makeStringParser( ']]' );
596 pipe = makeStringParser( '|' );
597
598 function template() {
599 var result = sequence( [
600 openTemplate,
601 templateContents,
602 closeTemplate
603 ] );
604 return result === null ? null : result[ 1 ];
605 }
606
607 function pipedWikilink() {
608 var result = sequence( [
609 nOrMore( 1, paramExpression ),
610 pipe,
611 nOrMore( 1, expression )
612 ] );
613 return result === null ? null : [
614 [ 'CONCAT' ].concat( result[ 0 ] ),
615 [ 'CONCAT' ].concat( result[ 2 ] )
616 ];
617 }
618
619 function unpipedWikilink() {
620 var result = sequence( [
621 nOrMore( 1, paramExpression )
622 ] );
623 return result === null ? null : [
624 [ 'CONCAT' ].concat( result[ 0 ] )
625 ];
626 }
627
628 wikilinkContents = choice( [
629 pipedWikilink,
630 unpipedWikilink
631 ] );
632
633 function wikilink() {
634 var result, parsedResult, parsedLinkContents;
635 result = null;
636
637 parsedResult = sequence( [
638 openWikilink,
639 wikilinkContents,
640 closeWikilink
641 ] );
642 if ( parsedResult !== null ) {
643 parsedLinkContents = parsedResult[ 1 ];
644 result = [ 'WIKILINK' ].concat( parsedLinkContents );
645 }
646 return result;
647 }
648
649 // TODO: Support data- if appropriate
650 function doubleQuotedHtmlAttributeValue() {
651 var parsedResult = sequence( [
652 doubleQuote,
653 htmlDoubleQuoteAttributeValue,
654 doubleQuote
655 ] );
656 return parsedResult === null ? null : parsedResult[ 1 ];
657 }
658
659 function singleQuotedHtmlAttributeValue() {
660 var parsedResult = sequence( [
661 singleQuote,
662 htmlSingleQuoteAttributeValue,
663 singleQuote
664 ] );
665 return parsedResult === null ? null : parsedResult[ 1 ];
666 }
667
668 function htmlAttribute() {
669 var parsedResult = sequence( [
670 whitespace,
671 asciiAlphabetLiteral,
672 htmlAttributeEquals,
673 choice( [
674 doubleQuotedHtmlAttributeValue,
675 singleQuotedHtmlAttributeValue
676 ] )
677 ] );
678 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
679 }
680
681 /**
682 * Checks if HTML is allowed
683 *
684 * @param {string} startTagName HTML start tag name
685 * @param {string} endTagName HTML start tag name
686 * @param {Object} attributes array of consecutive key value pairs,
687 * with index 2 * n being a name and 2 * n + 1 the associated value
688 * @return {boolean} true if this is HTML is allowed, false otherwise
689 */
690 function isAllowedHtml( startTagName, endTagName, attributes ) {
691 var i, len, attributeName;
692
693 startTagName = startTagName.toLowerCase();
694 endTagName = endTagName.toLowerCase();
695 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
696 return false;
697 }
698
699 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
700 attributeName = attributes[ i ];
701 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
702 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
703 return false;
704 }
705 }
706
707 return true;
708 }
709
710 function htmlAttributes() {
711 var parsedResult = nOrMore( 0, htmlAttribute )();
712 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
713 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
714 }
715
716 // Subset of allowed HTML markup.
717 // Most elements and many attributes allowed on the server are not supported yet.
718 function html() {
719 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
720 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
721 startCloseTagPos, endOpenTagPos, endCloseTagPos,
722 result = null;
723
724 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
725 // 1. open through closeHtmlTag
726 // 2. expression
727 // 3. openHtmlEnd through close
728 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
729
730 startOpenTagPos = pos;
731 parsedOpenTagResult = sequence( [
732 openHtmlStartTag,
733 asciiAlphabetLiteral,
734 htmlAttributes,
735 optionalForwardSlash,
736 closeHtmlTag
737 ] );
738
739 if ( parsedOpenTagResult === null ) {
740 return null;
741 }
742
743 endOpenTagPos = pos;
744 startTagName = parsedOpenTagResult[ 1 ];
745
746 parsedHtmlContents = nOrMore( 0, expression )();
747
748 startCloseTagPos = pos;
749 parsedCloseTagResult = sequence( [
750 openHtmlEndTag,
751 asciiAlphabetLiteral,
752 closeHtmlTag
753 ] );
754
755 if ( parsedCloseTagResult === null ) {
756 // Closing tag failed. Return the start tag and contents.
757 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
758 .concat( parsedHtmlContents );
759 }
760
761 endCloseTagPos = pos;
762 endTagName = parsedCloseTagResult[ 1 ];
763 wrappedAttributes = parsedOpenTagResult[ 2 ];
764 attributes = wrappedAttributes.slice( 1 );
765 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
766 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
767 .concat( parsedHtmlContents );
768 } else {
769 // HTML is not allowed, so contents will remain how
770 // it was, while HTML markup at this level will be
771 // treated as text
772 // E.g. assuming script tags are not allowed:
773 //
774 // <script>[[Foo|bar]]</script>
775 //
776 // results in '&lt;script&gt;' and '&lt;/script&gt;'
777 // (not treated as an HTML tag), surrounding a fully
778 // parsed HTML link.
779 //
780 // Concatenate everything from the tag, flattening the contents.
781 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
782 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
783 }
784
785 return result;
786 }
787
788 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
789 function nowiki() {
790 var parsedResult, plainText,
791 result = null;
792
793 parsedResult = sequence( [
794 makeStringParser( '<nowiki>' ),
795 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
796 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
797 makeStringParser( '</nowiki>' )
798 ] );
799 if ( parsedResult !== null ) {
800 plainText = parsedResult[ 1 ];
801 result = [ 'CONCAT' ].concat( plainText );
802 }
803
804 return result;
805 }
806
807 templateName = transform(
808 // see $wgLegalTitleChars
809 // not allowing : due to the need to catch "PLURAL:$1"
810 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
811 function ( result ) { return result.toString(); }
812 );
813 function templateParam() {
814 var expr, result;
815 result = sequence( [
816 pipe,
817 nOrMore( 0, paramExpression )
818 ] );
819 if ( result === null ) {
820 return null;
821 }
822 expr = result[ 1 ];
823 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
824 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
825 }
826
827 function templateWithReplacement() {
828 var result = sequence( [
829 templateName,
830 colon,
831 replacement
832 ] );
833 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
834 }
835 function templateWithOutReplacement() {
836 var result = sequence( [
837 templateName,
838 colon,
839 paramExpression
840 ] );
841 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
842 }
843 function templateWithOutFirstParameter() {
844 var result = sequence( [
845 templateName,
846 colon
847 ] );
848 return result === null ? null : [ result[ 0 ], '' ];
849 }
850 colon = makeStringParser( ':' );
851 templateContents = choice( [
852 function () {
853 var res = sequence( [
854 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
855 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
856 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
857 nOrMore( 0, templateParam )
858 ] );
859 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
860 },
861 function () {
862 var res = sequence( [
863 templateName,
864 nOrMore( 0, templateParam )
865 ] );
866 if ( res === null ) {
867 return null;
868 }
869 return [ res[ 0 ] ].concat( res[ 1 ] );
870 }
871 ] );
872 openTemplate = makeStringParser( '{{' );
873 closeTemplate = makeStringParser( '}}' );
874 nonWhitespaceExpression = choice( [
875 template,
876 wikilink,
877 extlink,
878 replacement,
879 literalWithoutSpace
880 ] );
881 paramExpression = choice( [
882 template,
883 wikilink,
884 extlink,
885 replacement,
886 literalWithoutBar
887 ] );
888
889 expression = choice( [
890 template,
891 wikilink,
892 extlink,
893 replacement,
894 nowiki,
895 html,
896 literal
897 ] );
898
899 // Used when only {{-transformation is wanted, for 'text'
900 // or 'escaped' formats
901 curlyBraceTransformExpression = choice( [
902 template,
903 replacement,
904 curlyBraceTransformExpressionLiteral
905 ] );
906
907 /**
908 * Starts the parse
909 *
910 * @param {Function} rootExpression root parse function
911 */
912 function start( rootExpression ) {
913 var result = nOrMore( 0, rootExpression )();
914 if ( result === null ) {
915 return null;
916 }
917 return [ 'CONCAT' ].concat( result );
918 }
919 // everything above this point is supposed to be stateless/static, but
920 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
921 // finally let's do some actual work...
922
923 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
924
925 /*
926 * For success, the p must have gotten to the end of the input
927 * and returned a non-null.
928 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
929 */
930 if ( result === null || pos !== input.length ) {
931 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
932 }
933 return result;
934 }
935
936 };
937
938 /**
939 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
940 */
941 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
942 var jmsg = this;
943 this.language = language;
944 $.each( magic, function ( key, val ) {
945 jmsg[ key.toLowerCase() ] = function () {
946 return val;
947 };
948 } );
949
950 /**
951 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
952 * Walk entire node structure, applying replacements and template functions when appropriate
953 *
954 * @param {Mixed} node Abstract syntax tree (top node or subnode)
955 * @param {Array} replacements for $1, $2, ... $n
956 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
957 */
958 this.emit = function ( node, replacements ) {
959 var ret, subnodes, operation,
960 jmsg = this;
961 switch ( typeof node ) {
962 case 'string':
963 case 'number':
964 ret = node;
965 break;
966 // typeof returns object for arrays
967 case 'object':
968 // node is an array of nodes
969 subnodes = $.map( node.slice( 1 ), function ( n ) {
970 return jmsg.emit( n, replacements );
971 } );
972 operation = node[ 0 ].toLowerCase();
973 if ( typeof jmsg[ operation ] === 'function' ) {
974 ret = jmsg[ operation ]( subnodes, replacements );
975 } else {
976 throw new Error( 'Unknown operation "' + operation + '"' );
977 }
978 break;
979 case 'undefined':
980 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
981 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
982 // The logical thing is probably to return the empty string here when we encounter undefined.
983 ret = '';
984 break;
985 default:
986 throw new Error( 'Unexpected type in AST: ' + typeof node );
987 }
988 return ret;
989 };
990 };
991
992 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
993 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
994 // If you have 'magic words' then configure the parser to have them upon creation.
995 //
996 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
997 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
998 mw.jqueryMsg.htmlEmitter.prototype = {
999 /**
1000 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1001 * Must return a single node to parents -- a jQuery with synthetic span
1002 * However, unwrap any other synthetic spans in our children and pass them upwards
1003 *
1004 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
1005 * @return {jQuery}
1006 */
1007 concat: function ( nodes ) {
1008 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1009 $.each( nodes, function ( i, node ) {
1010 // Let jQuery append nodes, arrays of nodes and jQuery objects
1011 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1012 appendWithoutParsing( $span, node );
1013 } );
1014 return $span;
1015 },
1016
1017 /**
1018 * Return escaped replacement of correct index, or string if unavailable.
1019 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1020 * if the specified parameter is not found return the same string
1021 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1022 *
1023 * TODO: Throw error if nodes.length > 1 ?
1024 *
1025 * @param {Array} nodes List of one element, integer, n >= 0
1026 * @param {Array} replacements List of at least n strings
1027 * @return {string} replacement
1028 */
1029 replace: function ( nodes, replacements ) {
1030 var index = parseInt( nodes[ 0 ], 10 );
1031
1032 if ( index < replacements.length ) {
1033 return replacements[ index ];
1034 } else {
1035 // index not found, fallback to displaying variable
1036 return '$' + ( index + 1 );
1037 }
1038 },
1039
1040 /**
1041 * Transform wiki-link
1042 *
1043 * TODO:
1044 * It only handles basic cases, either no pipe, or a pipe with an explicit
1045 * anchor.
1046 *
1047 * It does not attempt to handle features like the pipe trick.
1048 * However, the pipe trick should usually not be present in wikitext retrieved
1049 * from the server, since the replacement is done at save time.
1050 * It may, though, if the wikitext appears in extension-controlled content.
1051 *
1052 * @param {string[]} nodes
1053 */
1054 wikilink: function ( nodes ) {
1055 var page, anchor, url, $el;
1056
1057 page = textify( nodes[ 0 ] );
1058 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1059 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1060 if ( page.charAt( 0 ) === ':' ) {
1061 page = page.slice( 1 );
1062 }
1063 url = mw.util.getUrl( page );
1064
1065 if ( nodes.length === 1 ) {
1066 // [[Some Page]] or [[Namespace:Some Page]]
1067 anchor = page;
1068 } else {
1069 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1070 anchor = nodes[ 1 ];
1071 }
1072
1073 $el = $( '<a>' ).attr( {
1074 title: page,
1075 href: url
1076 } );
1077 return appendWithoutParsing( $el, anchor );
1078 },
1079
1080 /**
1081 * Converts array of HTML element key value pairs to object
1082 *
1083 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1084 * name and 2 * n + 1 the associated value
1085 * @return {Object} Object mapping attribute name to attribute value
1086 */
1087 htmlattributes: function ( nodes ) {
1088 var i, len, mapping = {};
1089 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1090 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1091 }
1092 return mapping;
1093 },
1094
1095 /**
1096 * Handles an (already-validated) HTML element.
1097 *
1098 * @param {Array} nodes Nodes to process when creating element
1099 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1100 */
1101 htmlelement: function ( nodes ) {
1102 var tagName, attributes, contents, $element;
1103
1104 tagName = nodes.shift();
1105 attributes = nodes.shift();
1106 contents = nodes;
1107 $element = $( document.createElement( tagName ) ).attr( attributes );
1108 return appendWithoutParsing( $element, contents );
1109 },
1110
1111 /**
1112 * Transform parsed structure into external link.
1113 *
1114 * The "href" can be:
1115 * - a jQuery object, treat it as "enclosing" the link text.
1116 * - a function, treat it as the click handler.
1117 * - a string, or our htmlEmitter jQuery object, treat it as a URI after stringifying.
1118 *
1119 * TODO: throw an error if nodes.length > 2 ?
1120 *
1121 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1122 * @return {jQuery}
1123 */
1124 extlink: function ( nodes ) {
1125 var $el,
1126 arg = nodes[ 0 ],
1127 contents = nodes[ 1 ];
1128 if ( arg instanceof jQuery && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1129 $el = arg;
1130 } else {
1131 $el = $( '<a>' );
1132 if ( typeof arg === 'function' ) {
1133 $el.attr( {
1134 role: 'button',
1135 tabindex: 0
1136 } )
1137 .on( 'click keypress', function ( e ) {
1138 if (
1139 e.type === 'click' ||
1140 e.type === 'keypress' && e.which === 13
1141 ) {
1142 arg.call( this, e );
1143 }
1144 } );
1145 } else {
1146 $el.attr( 'href', textify( arg ) );
1147 }
1148 }
1149 return appendWithoutParsing( $el.empty(), contents );
1150 },
1151
1152 /**
1153 * Transform parsed structure into pluralization
1154 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1155 * So convert it back with the current language's convertNumber.
1156 *
1157 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1158 * @return {string} selected pluralized form according to current language
1159 */
1160 plural: function ( nodes ) {
1161 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1162 explicitPluralForms = {};
1163
1164 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1165 forms = nodes.slice( 1 );
1166 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1167 form = forms[ formIndex ];
1168
1169 if ( form instanceof jQuery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1170 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1171 firstChild = form.contents().get( 0 );
1172 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1173 firstChildText = firstChild.textContent;
1174 if ( /^\d+=/.test( firstChildText ) ) {
1175 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1176 // Use the digit part as key and rest of first text node and
1177 // rest of child nodes as value.
1178 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1179 explicitPluralForms[ explicitPluralFormNumber ] = form;
1180 forms[ formIndex ] = undefined;
1181 }
1182 }
1183 } else if ( /^\d+=/.test( form ) ) {
1184 // Simple explicit plural forms like 12=a dozen
1185 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1186 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1187 forms[ formIndex ] = undefined;
1188 }
1189 }
1190
1191 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1192 forms = $.map( forms, function ( form ) {
1193 return form;
1194 } );
1195
1196 return this.language.convertPlural( count, forms, explicitPluralForms );
1197 },
1198
1199 /**
1200 * Transform parsed structure according to gender.
1201 *
1202 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1203 *
1204 * The first node must be one of:
1205 * - the mw.user object (or a compatible one)
1206 * - an empty string - indicating the current user, same effect as passing the mw.user object
1207 * - a gender string ('male', 'female' or 'unknown')
1208 *
1209 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1210 * @return {string} Selected gender form according to current language
1211 */
1212 gender: function ( nodes ) {
1213 var gender,
1214 maybeUser = nodes[ 0 ],
1215 forms = nodes.slice( 1 );
1216
1217 if ( maybeUser === '' ) {
1218 maybeUser = mw.user;
1219 }
1220
1221 // If we are passed a mw.user-like object, check their gender.
1222 // Otherwise, assume the gender string itself was passed .
1223 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1224 gender = maybeUser.options.get( 'gender' );
1225 } else {
1226 gender = maybeUser;
1227 }
1228
1229 return this.language.gender( gender, forms );
1230 },
1231
1232 /**
1233 * Transform parsed structure into grammar conversion.
1234 * Invoked by putting `{{grammar:form|word}}` in a message
1235 *
1236 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1237 * @return {string} selected grammatical form according to current language
1238 */
1239 grammar: function ( nodes ) {
1240 var form = nodes[ 0 ],
1241 word = nodes[ 1 ];
1242 return word && form && this.language.convertGrammar( word, form );
1243 },
1244
1245 /**
1246 * Tranform parsed structure into a int: (interface language) message include
1247 * Invoked by putting `{{int:othermessage}}` into a message
1248 *
1249 * @param {Array} nodes List of nodes
1250 * @return {string} Other message
1251 */
1252 'int': function ( nodes ) {
1253 var msg = nodes[ 0 ];
1254 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1255 },
1256
1257 /**
1258 * Get localized namespace name from canonical name or namespace number.
1259 * Invoked by putting `{{ns:foo}}` into a message
1260 *
1261 * @param {Array} nodes List of nodes
1262 * @return {string} Localized namespace name
1263 */
1264 ns: function ( nodes ) {
1265 var ns = $.trim( textify( nodes[ 0 ] ) );
1266 if ( !/^\d+$/.test( ns ) ) {
1267 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1268 }
1269 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1270 return ns || '';
1271 },
1272
1273 /**
1274 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1275 * and outputs it in the localized digit script and formatted with decimal
1276 * separator, according to the current language.
1277 *
1278 * @param {Array} nodes List of nodes
1279 * @return {number|string} Formatted number
1280 */
1281 formatnum: function ( nodes ) {
1282 var isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1283 number = nodes[ 0 ];
1284
1285 return this.language.convertNumber( number, isInteger );
1286 },
1287
1288 /**
1289 * Lowercase text
1290 *
1291 * @param {Array} nodes List of nodes
1292 * @return {string} The given text, all in lowercase
1293 */
1294 lc: function ( nodes ) {
1295 return textify( nodes[ 0 ] ).toLowerCase();
1296 },
1297
1298 /**
1299 * Uppercase text
1300 *
1301 * @param {Array} nodes List of nodes
1302 * @return {string} The given text, all in uppercase
1303 */
1304 uc: function ( nodes ) {
1305 return textify( nodes[ 0 ] ).toUpperCase();
1306 },
1307
1308 /**
1309 * Lowercase first letter of input, leaving the rest unchanged
1310 *
1311 * @param {Array} nodes List of nodes
1312 * @return {string} The given text, with the first character in lowercase
1313 */
1314 lcfirst: function ( nodes ) {
1315 var text = textify( nodes[ 0 ] );
1316 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1317 },
1318
1319 /**
1320 * Uppercase first letter of input, leaving the rest unchanged
1321 *
1322 * @param {Array} nodes List of nodes
1323 * @return {string} The given text, with the first character in uppercase
1324 */
1325 ucfirst: function ( nodes ) {
1326 var text = textify( nodes[ 0 ] );
1327 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1328 }
1329 };
1330
1331 // Deprecated! don't rely on gM existing.
1332 // The window.gM ought not to be required - or if required, not required here.
1333 // But moving it to extensions breaks it (?!)
1334 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1335 // @deprecated since 1.23
1336 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1337
1338 /**
1339 * @method
1340 * @member jQuery
1341 * @see mw.jqueryMsg#getPlugin
1342 */
1343 $.fn.msg = mw.jqueryMsg.getPlugin();
1344
1345 // Replace the default message parser with jqueryMsg
1346 oldParser = mw.Message.prototype.parser;
1347 mw.Message.prototype.parser = function () {
1348 if ( this.format === 'plain' || !/\{\{|[\[<>&]/.test( this.map.get( this.key ) ) ) {
1349 // Fall back to mw.msg's simple parser
1350 return oldParser.apply( this );
1351 }
1352
1353 if ( !this.map.hasOwnProperty( this.format ) ) {
1354 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1355 messages: this.map,
1356 // For format 'escaped', escaping part is handled by mediawiki.js
1357 format: this.format
1358 } );
1359 }
1360 return this.map[ this.format ]( this.key, this.parameters );
1361 };
1362
1363 /**
1364 * Parse the message to DOM nodes, rather than HTML string like #parse.
1365 *
1366 * This method is only available when jqueryMsg is loaded.
1367 *
1368 * @since 1.27
1369 * @method parseDom
1370 * @member mw.Message
1371 * @return {jQuery}
1372 */
1373 mw.Message.prototype.parseDom = ( function () {
1374 var reusableParent = $( '<div>' );
1375 return function () {
1376 return reusableParent.msg( this.key, this.parameters ).contents().detach();
1377 };
1378 }() );
1379
1380 }( mediaWiki, jQuery ) );