c82b9cbfb93e9fd8395c43d2ffe027a17db351c5
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 PAGENAME: mw.config.get( 'wgPageName' ),
19 PAGENAMEE: mw.util.wikiUrlencode( mw.config.get( 'wgPageName' ) ),
20 SITENAME: mw.config.get( 'wgSiteName' )
21 },
22 // Whitelist for allowed HTML elements in wikitext.
23 // Self-closing tags are not currently supported.
24 // Can be populated via setPrivateData().
25 allowedHtmlElements: [],
26 // Key tag name, value allowed attributes for that tag.
27 // See Sanitizer::setupAttributeWhitelist
28 allowedHtmlCommonAttributes: [
29 // HTML
30 'id',
31 'class',
32 'style',
33 'lang',
34 'dir',
35 'title',
36
37 // WAI-ARIA
38 'role'
39 ],
40
41 // Attributes allowed for specific elements.
42 // Key is element name in lower case
43 // Value is array of allowed attributes for that element
44 allowedHtmlAttributesByElement: {},
45 messages: mw.messages,
46 language: mw.language,
47
48 // Same meaning as in mediawiki.js.
49 //
50 // Only 'text', 'parse', and 'escaped' are supported, and the
51 // actual escaping for 'escaped' is done by other code (generally
52 // through mediawiki.js).
53 //
54 // However, note that this default only
55 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
56 // is 'text', including when it uses jqueryMsg.
57 format: 'parse'
58 };
59
60 /**
61 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
62 * convert what it detects as an htmlString to an element.
63 *
64 * If our own htmlEmitter jQuery object is given, its children will be unwrapped and appended to
65 * new parent.
66 *
67 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
68 *
69 * @private
70 * @param {jQuery} $parent Parent node wrapped by jQuery
71 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
72 * @return {jQuery} $parent
73 */
74 function appendWithoutParsing( $parent, children ) {
75 var i, len;
76
77 if ( !$.isArray( children ) ) {
78 children = [ children ];
79 }
80
81 for ( i = 0, len = children.length; i < len; i++ ) {
82 if ( typeof children[ i ] !== 'object' ) {
83 children[ i ] = document.createTextNode( children[ i ] );
84 }
85 if ( children[ i ] instanceof jQuery && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
86 children[ i ] = children[ i ].contents();
87 }
88 }
89
90 return $parent.append( children );
91 }
92
93 /**
94 * Decodes the main HTML entities, those encoded by mw.html.escape.
95 *
96 * @private
97 * @param {string} encoded Encoded string
98 * @return {string} String with those entities decoded
99 */
100 function decodePrimaryHtmlEntities( encoded ) {
101 return encoded
102 .replace( /&#039;/g, '\'' )
103 .replace( /&quot;/g, '"' )
104 .replace( /&lt;/g, '<' )
105 .replace( /&gt;/g, '>' )
106 .replace( /&amp;/g, '&' );
107 }
108
109 /**
110 * Turn input into a string.
111 *
112 * @private
113 * @param {string|jQuery} input
114 * @return {string} Textual value of input
115 */
116 function textify( input ) {
117 if ( input instanceof jQuery ) {
118 input = input.text();
119 }
120 return String( input );
121 }
122
123 /**
124 * Given parser options, return a function that parses a key and replacements, returning jQuery object
125 *
126 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
127 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
128 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
129 *
130 * @private
131 * @param {Object} options Parser options
132 * @return {Function}
133 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
134 * @return {jQuery} return.return
135 */
136 function getFailableParserFn( options ) {
137 return function ( args ) {
138 var fallback,
139 // eslint-disable-next-line new-cap
140 parser = new mw.jqueryMsg.parser( options ),
141 key = args[ 0 ],
142 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
143 try {
144 return parser.parse( key, argsArray );
145 } catch ( e ) {
146 fallback = parser.settings.messages.get( key );
147 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
148 mw.track( 'mediawiki.jqueryMsg.error', {
149 messageKey: key,
150 errorMessage: e.message
151 } );
152 return $( '<span>' ).text( fallback );
153 }
154 };
155 }
156
157 mw.jqueryMsg = {};
158
159 /**
160 * Initialize parser defaults.
161 *
162 * ResourceLoaderJqueryMsgModule calls this to provide default values from
163 * Sanitizer.php for allowed HTML elements. To override this data for individual
164 * parsers, pass the relevant options to mw.jqueryMsg.parser.
165 *
166 * @private
167 * @param {Object} data
168 */
169 mw.jqueryMsg.setParserDefaults = function ( data ) {
170 $.extend( parserDefaults, data );
171 };
172
173 /**
174 * Get current parser defaults.
175 *
176 * Primarily used for the unit test. Returns a copy.
177 *
178 * @private
179 * @return {Object}
180 */
181 mw.jqueryMsg.getParserDefaults = function () {
182 return $.extend( {}, parserDefaults );
183 };
184
185 /**
186 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
187 * e.g.
188 *
189 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
190 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
191 *
192 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
193 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
194 *
195 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
196 * somefunction( a, b, c, d )
197 * is equivalent to
198 * somefunction( a, [b, c, d] )
199 *
200 * @param {Object} options parser options
201 * @return {Function} Function suitable for assigning to window.gM
202 * @return {string} return.key Message key.
203 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
204 * @return {string} return.return Rendered HTML.
205 */
206 mw.jqueryMsg.getMessageFunction = function ( options ) {
207 var failableParserFn, format;
208
209 if ( options && options.format !== undefined ) {
210 format = options.format;
211 } else {
212 format = parserDefaults.format;
213 }
214
215 return function () {
216 var failableResult;
217 if ( !failableParserFn ) {
218 failableParserFn = getFailableParserFn( options );
219 }
220 failableResult = failableParserFn( arguments );
221 if ( format === 'text' || format === 'escaped' ) {
222 return failableResult.text();
223 } else {
224 return failableResult.html();
225 }
226 };
227 };
228
229 /**
230 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
231 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
232 * e.g.
233 *
234 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
235 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
236 * $( 'p#headline' ).msg( 'hello-user', userlink );
237 *
238 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
239 * somefunction( a, b, c, d )
240 * is equivalent to
241 * somefunction( a, [b, c, d] )
242 *
243 * We append to 'this', which in a jQuery plugin context will be the selected elements.
244 *
245 * @param {Object} options Parser options
246 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
247 * @return {string} return.key Message key.
248 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
249 * @return {jQuery} return.return
250 */
251 mw.jqueryMsg.getPlugin = function ( options ) {
252 var failableParserFn;
253
254 return function () {
255 var $target;
256 if ( !failableParserFn ) {
257 failableParserFn = getFailableParserFn( options );
258 }
259 $target = this.empty();
260 appendWithoutParsing( $target, failableParserFn( arguments ) );
261 return $target;
262 };
263 };
264
265 /**
266 * The parser itself.
267 * Describes an object, whose primary duty is to .parse() message keys.
268 *
269 * @class
270 * @private
271 * @param {Object} options
272 */
273 mw.jqueryMsg.parser = function ( options ) {
274 this.settings = $.extend( {}, parserDefaults, options );
275 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
276 this.astCache = {};
277
278 // eslint-disable-next-line new-cap
279 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
280 };
281
282 mw.jqueryMsg.parser.prototype = {
283 /**
284 * Where the magic happens.
285 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
286 * If an error is thrown, returns original key, and logs the error
287 *
288 * @param {string} key Message key.
289 * @param {Array} replacements Variable replacements for $1, $2... $n
290 * @return {jQuery}
291 */
292 parse: function ( key, replacements ) {
293 var ast = this.getAst( key );
294 return this.emitter.emit( ast, replacements );
295 },
296
297 /**
298 * Fetch the message string associated with a key, return parsed structure. Memoized.
299 * Note that we pass '⧼' + key + '⧽' back for a missing message here.
300 *
301 * @param {string} key
302 * @return {string|Array} string of '⧼key⧽' if message missing, simple string if possible, array of arrays if needs parsing
303 */
304 getAst: function ( key ) {
305 var wikiText;
306
307 if ( !this.astCache.hasOwnProperty( key ) ) {
308 wikiText = this.settings.messages.get( key );
309 if ( typeof wikiText !== 'string' ) {
310 wikiText = '⧼' + key + '⧽';
311 }
312 this.astCache[ key ] = this.wikiTextToAst( wikiText );
313 }
314 return this.astCache[ key ];
315 },
316
317 /**
318 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
319 *
320 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
321 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
322 *
323 * @param {string} input Message string wikitext
324 * @throws Error
325 * @return {Mixed} abstract syntax tree
326 */
327 wikiTextToAst: function ( input ) {
328 var pos,
329 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
330 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
331 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
332 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
333 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
334 openExtlink, closeExtlink, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
335 templateContents, openTemplate, closeTemplate,
336 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
337 settings = this.settings,
338 concat = Array.prototype.concat;
339
340 // Indicates current position in input as we parse through it.
341 // Shared among all parsing functions below.
342 pos = 0;
343
344 // =========================================================
345 // parsing combinators - could be a library on its own
346 // =========================================================
347
348 /**
349 * Try parsers until one works, if none work return null
350 *
351 * @private
352 * @param {Function[]} ps
353 * @return {string|null}
354 */
355 function choice( ps ) {
356 return function () {
357 var i, result;
358 for ( i = 0; i < ps.length; i++ ) {
359 result = ps[ i ]();
360 if ( result !== null ) {
361 return result;
362 }
363 }
364 return null;
365 };
366 }
367
368 /**
369 * Try several ps in a row, all must succeed or return null.
370 * This is the only eager one.
371 *
372 * @private
373 * @param {Function[]} ps
374 * @return {string|null}
375 */
376 function sequence( ps ) {
377 var i, res,
378 originalPos = pos,
379 result = [];
380 for ( i = 0; i < ps.length; i++ ) {
381 res = ps[ i ]();
382 if ( res === null ) {
383 pos = originalPos;
384 return null;
385 }
386 result.push( res );
387 }
388 return result;
389 }
390
391 /**
392 * Run the same parser over and over until it fails.
393 * Must succeed a minimum of n times or return null.
394 *
395 * @private
396 * @param {number} n
397 * @param {Function} p
398 * @return {string|null}
399 */
400 function nOrMore( n, p ) {
401 return function () {
402 var originalPos = pos,
403 result = [],
404 parsed = p();
405 while ( parsed !== null ) {
406 result.push( parsed );
407 parsed = p();
408 }
409 if ( result.length < n ) {
410 pos = originalPos;
411 return null;
412 }
413 return result;
414 };
415 }
416
417 /**
418 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
419 *
420 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
421 * May be some scoping issue
422 *
423 * @private
424 * @param {Function} p
425 * @param {Function} fn
426 * @return {string|null}
427 */
428 function transform( p, fn ) {
429 return function () {
430 var result = p();
431 return result === null ? null : fn( result );
432 };
433 }
434
435 /**
436 * Just make parsers out of simpler JS builtin types
437 *
438 * @private
439 * @param {string} s
440 * @return {Function}
441 * @return {string} return.return
442 */
443 function makeStringParser( s ) {
444 var len = s.length;
445 return function () {
446 var result = null;
447 if ( input.substr( pos, len ) === s ) {
448 result = s;
449 pos += len;
450 }
451 return result;
452 };
453 }
454
455 /**
456 * Makes a regex parser, given a RegExp object.
457 * The regex being passed in should start with a ^ to anchor it to the start
458 * of the string.
459 *
460 * @private
461 * @param {RegExp} regex anchored regex
462 * @return {Function} function to parse input based on the regex
463 */
464 function makeRegexParser( regex ) {
465 return function () {
466 var matches = input.slice( pos ).match( regex );
467 if ( matches === null ) {
468 return null;
469 }
470 pos += matches[ 0 ].length;
471 return matches[ 0 ];
472 };
473 }
474
475 // ===================================================================
476 // General patterns above this line -- wikitext specific parsers below
477 // ===================================================================
478
479 // Parsing functions follow. All parsing functions work like this:
480 // They don't accept any arguments.
481 // Instead, they just operate non destructively on the string 'input'
482 // As they can consume parts of the string, they advance the shared variable pos,
483 // and return tokens (or whatever else they want to return).
484 // some things are defined as closures and other things as ordinary functions
485 // converting everything to a closure makes it a lot harder to debug... errors pop up
486 // but some debuggers can't tell you exactly where they come from. Also the mutually
487 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
488 // This may be because, to save code, memoization was removed
489
490 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
491 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
492 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
493 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
494
495 backslash = makeStringParser( '\\' );
496 doubleQuote = makeStringParser( '"' );
497 singleQuote = makeStringParser( '\'' );
498 anyCharacter = makeRegexParser( /^./ );
499
500 openHtmlStartTag = makeStringParser( '<' );
501 optionalForwardSlash = makeRegexParser( /^\/?/ );
502 openHtmlEndTag = makeStringParser( '</' );
503 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
504 closeHtmlTag = makeRegexParser( /^\s*>/ );
505
506 function escapedLiteral() {
507 var result = sequence( [
508 backslash,
509 anyCharacter
510 ] );
511 return result === null ? null : result[ 1 ];
512 }
513 escapedOrLiteralWithoutSpace = choice( [
514 escapedLiteral,
515 regularLiteralWithoutSpace
516 ] );
517 escapedOrLiteralWithoutBar = choice( [
518 escapedLiteral,
519 regularLiteralWithoutBar
520 ] );
521 escapedOrRegularLiteral = choice( [
522 escapedLiteral,
523 regularLiteral
524 ] );
525 // Used to define "literals" without spaces, in space-delimited situations
526 function literalWithoutSpace() {
527 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
528 return result === null ? null : result.join( '' );
529 }
530 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
531 // it is not a literal in the parameter
532 function literalWithoutBar() {
533 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
534 return result === null ? null : result.join( '' );
535 }
536
537 function literal() {
538 var result = nOrMore( 1, escapedOrRegularLiteral )();
539 return result === null ? null : result.join( '' );
540 }
541
542 function curlyBraceTransformExpressionLiteral() {
543 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
544 return result === null ? null : result.join( '' );
545 }
546
547 asciiAlphabetLiteral = makeRegexParser( /^[A-Za-z]+/ );
548 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
549 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
550
551 whitespace = makeRegexParser( /^\s+/ );
552 dollar = makeStringParser( '$' );
553 digits = makeRegexParser( /^\d+/ );
554
555 function replacement() {
556 var result = sequence( [
557 dollar,
558 digits
559 ] );
560 if ( result === null ) {
561 return null;
562 }
563 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
564 }
565 openExtlink = makeStringParser( '[' );
566 closeExtlink = makeStringParser( ']' );
567 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
568 function extlink() {
569 var result, parsedResult, target;
570 result = null;
571 parsedResult = sequence( [
572 openExtlink,
573 nOrMore( 1, nonWhitespaceExpression ),
574 whitespace,
575 nOrMore( 1, expression ),
576 closeExtlink
577 ] );
578 if ( parsedResult !== null ) {
579 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
580 // passing fancy parameters (like a whole jQuery object or a function) to use for the
581 // link. Check only if it's a single match, since we can either do CONCAT or not for
582 // singles with the same effect.
583 target = parsedResult[ 1 ].length === 1 ?
584 parsedResult[ 1 ][ 0 ] :
585 [ 'CONCAT' ].concat( parsedResult[ 1 ] );
586 result = [
587 'EXTLINK',
588 target,
589 [ 'CONCAT' ].concat( parsedResult[ 3 ] )
590 ];
591 }
592 return result;
593 }
594 openWikilink = makeStringParser( '[[' );
595 closeWikilink = makeStringParser( ']]' );
596 pipe = makeStringParser( '|' );
597
598 function template() {
599 var result = sequence( [
600 openTemplate,
601 templateContents,
602 closeTemplate
603 ] );
604 return result === null ? null : result[ 1 ];
605 }
606
607 function pipedWikilink() {
608 var result = sequence( [
609 nOrMore( 1, paramExpression ),
610 pipe,
611 nOrMore( 1, expression )
612 ] );
613 return result === null ? null : [
614 [ 'CONCAT' ].concat( result[ 0 ] ),
615 [ 'CONCAT' ].concat( result[ 2 ] )
616 ];
617 }
618
619 function unpipedWikilink() {
620 var result = sequence( [
621 nOrMore( 1, paramExpression )
622 ] );
623 return result === null ? null : [
624 [ 'CONCAT' ].concat( result[ 0 ] )
625 ];
626 }
627
628 wikilinkContents = choice( [
629 pipedWikilink,
630 unpipedWikilink
631 ] );
632
633 function wikilink() {
634 var result, parsedResult, parsedLinkContents;
635 result = null;
636
637 parsedResult = sequence( [
638 openWikilink,
639 wikilinkContents,
640 closeWikilink
641 ] );
642 if ( parsedResult !== null ) {
643 parsedLinkContents = parsedResult[ 1 ];
644 result = [ 'WIKILINK' ].concat( parsedLinkContents );
645 }
646 return result;
647 }
648
649 // TODO: Support data- if appropriate
650 function doubleQuotedHtmlAttributeValue() {
651 var parsedResult = sequence( [
652 doubleQuote,
653 htmlDoubleQuoteAttributeValue,
654 doubleQuote
655 ] );
656 return parsedResult === null ? null : parsedResult[ 1 ];
657 }
658
659 function singleQuotedHtmlAttributeValue() {
660 var parsedResult = sequence( [
661 singleQuote,
662 htmlSingleQuoteAttributeValue,
663 singleQuote
664 ] );
665 return parsedResult === null ? null : parsedResult[ 1 ];
666 }
667
668 function htmlAttribute() {
669 var parsedResult = sequence( [
670 whitespace,
671 asciiAlphabetLiteral,
672 htmlAttributeEquals,
673 choice( [
674 doubleQuotedHtmlAttributeValue,
675 singleQuotedHtmlAttributeValue
676 ] )
677 ] );
678 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
679 }
680
681 /**
682 * Checks if HTML is allowed
683 *
684 * @param {string} startTagName HTML start tag name
685 * @param {string} endTagName HTML start tag name
686 * @param {Object} attributes array of consecutive key value pairs,
687 * with index 2 * n being a name and 2 * n + 1 the associated value
688 * @return {boolean} true if this is HTML is allowed, false otherwise
689 */
690 function isAllowedHtml( startTagName, endTagName, attributes ) {
691 var i, len, attributeName;
692
693 startTagName = startTagName.toLowerCase();
694 endTagName = endTagName.toLowerCase();
695 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
696 return false;
697 }
698
699 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
700 attributeName = attributes[ i ];
701 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
702 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
703 return false;
704 }
705 }
706
707 return true;
708 }
709
710 function htmlAttributes() {
711 var parsedResult = nOrMore( 0, htmlAttribute )();
712 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
713 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
714 }
715
716 // Subset of allowed HTML markup.
717 // Most elements and many attributes allowed on the server are not supported yet.
718 function html() {
719 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
720 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
721 startCloseTagPos, endOpenTagPos, endCloseTagPos,
722 result = null;
723
724 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
725 // 1. open through closeHtmlTag
726 // 2. expression
727 // 3. openHtmlEnd through close
728 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
729
730 startOpenTagPos = pos;
731 parsedOpenTagResult = sequence( [
732 openHtmlStartTag,
733 asciiAlphabetLiteral,
734 htmlAttributes,
735 optionalForwardSlash,
736 closeHtmlTag
737 ] );
738
739 if ( parsedOpenTagResult === null ) {
740 return null;
741 }
742
743 endOpenTagPos = pos;
744 startTagName = parsedOpenTagResult[ 1 ];
745
746 parsedHtmlContents = nOrMore( 0, expression )();
747
748 startCloseTagPos = pos;
749 parsedCloseTagResult = sequence( [
750 openHtmlEndTag,
751 asciiAlphabetLiteral,
752 closeHtmlTag
753 ] );
754
755 if ( parsedCloseTagResult === null ) {
756 // Closing tag failed. Return the start tag and contents.
757 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
758 .concat( parsedHtmlContents );
759 }
760
761 endCloseTagPos = pos;
762 endTagName = parsedCloseTagResult[ 1 ];
763 wrappedAttributes = parsedOpenTagResult[ 2 ];
764 attributes = wrappedAttributes.slice( 1 );
765 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
766 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
767 .concat( parsedHtmlContents );
768 } else {
769 // HTML is not allowed, so contents will remain how
770 // it was, while HTML markup at this level will be
771 // treated as text
772 // E.g. assuming script tags are not allowed:
773 //
774 // <script>[[Foo|bar]]</script>
775 //
776 // results in '&lt;script&gt;' and '&lt;/script&gt;'
777 // (not treated as an HTML tag), surrounding a fully
778 // parsed HTML link.
779 //
780 // Concatenate everything from the tag, flattening the contents.
781 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
782 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
783 }
784
785 return result;
786 }
787
788 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
789 function nowiki() {
790 var parsedResult, plainText,
791 result = null;
792
793 parsedResult = sequence( [
794 makeStringParser( '<nowiki>' ),
795 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
796 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
797 makeStringParser( '</nowiki>' )
798 ] );
799 if ( parsedResult !== null ) {
800 plainText = parsedResult[ 1 ];
801 result = [ 'CONCAT' ].concat( plainText );
802 }
803
804 return result;
805 }
806
807 templateName = transform(
808 // see $wgLegalTitleChars
809 // not allowing : due to the need to catch "PLURAL:$1"
810 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
811 function ( result ) { return result.toString(); }
812 );
813 function templateParam() {
814 var expr, result;
815 result = sequence( [
816 pipe,
817 nOrMore( 0, paramExpression )
818 ] );
819 if ( result === null ) {
820 return null;
821 }
822 expr = result[ 1 ];
823 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
824 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
825 }
826
827 function templateWithReplacement() {
828 var result = sequence( [
829 templateName,
830 colon,
831 replacement
832 ] );
833 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
834 }
835 function templateWithOutReplacement() {
836 var result = sequence( [
837 templateName,
838 colon,
839 paramExpression
840 ] );
841 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
842 }
843 function templateWithOutFirstParameter() {
844 var result = sequence( [
845 templateName,
846 colon
847 ] );
848 return result === null ? null : [ result[ 0 ], '' ];
849 }
850 colon = makeStringParser( ':' );
851 templateContents = choice( [
852 function () {
853 var res = sequence( [
854 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
855 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
856 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
857 nOrMore( 0, templateParam )
858 ] );
859 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
860 },
861 function () {
862 var res = sequence( [
863 templateName,
864 nOrMore( 0, templateParam )
865 ] );
866 if ( res === null ) {
867 return null;
868 }
869 return [ res[ 0 ] ].concat( res[ 1 ] );
870 }
871 ] );
872 openTemplate = makeStringParser( '{{' );
873 closeTemplate = makeStringParser( '}}' );
874 nonWhitespaceExpression = choice( [
875 template,
876 wikilink,
877 extlink,
878 replacement,
879 literalWithoutSpace
880 ] );
881 paramExpression = choice( [
882 template,
883 wikilink,
884 extlink,
885 replacement,
886 literalWithoutBar
887 ] );
888
889 expression = choice( [
890 template,
891 wikilink,
892 extlink,
893 replacement,
894 nowiki,
895 html,
896 literal
897 ] );
898
899 // Used when only {{-transformation is wanted, for 'text'
900 // or 'escaped' formats
901 curlyBraceTransformExpression = choice( [
902 template,
903 replacement,
904 curlyBraceTransformExpressionLiteral
905 ] );
906
907 /**
908 * Starts the parse
909 *
910 * @param {Function} rootExpression Root parse function
911 * @return {Array|null}
912 */
913 function start( rootExpression ) {
914 var result = nOrMore( 0, rootExpression )();
915 if ( result === null ) {
916 return null;
917 }
918 return [ 'CONCAT' ].concat( result );
919 }
920 // everything above this point is supposed to be stateless/static, but
921 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
922 // finally let's do some actual work...
923
924 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
925
926 /*
927 * For success, the p must have gotten to the end of the input
928 * and returned a non-null.
929 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
930 */
931 if ( result === null || pos !== input.length ) {
932 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
933 }
934 return result;
935 }
936
937 };
938
939 /**
940 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
941 *
942 * @param {Object} language
943 * @param {Object} magic
944 */
945 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
946 var jmsg = this;
947 this.language = language;
948 $.each( magic, function ( key, val ) {
949 jmsg[ key.toLowerCase() ] = function () {
950 return val;
951 };
952 } );
953
954 /**
955 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
956 * Walk entire node structure, applying replacements and template functions when appropriate
957 *
958 * @param {Mixed} node Abstract syntax tree (top node or subnode)
959 * @param {Array} replacements for $1, $2, ... $n
960 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
961 */
962 this.emit = function ( node, replacements ) {
963 var ret, subnodes, operation,
964 jmsg = this;
965 switch ( typeof node ) {
966 case 'string':
967 case 'number':
968 ret = node;
969 break;
970 // typeof returns object for arrays
971 case 'object':
972 // node is an array of nodes
973 subnodes = $.map( node.slice( 1 ), function ( n ) {
974 return jmsg.emit( n, replacements );
975 } );
976 operation = node[ 0 ].toLowerCase();
977 if ( typeof jmsg[ operation ] === 'function' ) {
978 ret = jmsg[ operation ]( subnodes, replacements );
979 } else {
980 throw new Error( 'Unknown operation "' + operation + '"' );
981 }
982 break;
983 case 'undefined':
984 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
985 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
986 // The logical thing is probably to return the empty string here when we encounter undefined.
987 ret = '';
988 break;
989 default:
990 throw new Error( 'Unexpected type in AST: ' + typeof node );
991 }
992 return ret;
993 };
994 };
995
996 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
997 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
998 // If you have 'magic words' then configure the parser to have them upon creation.
999 //
1000 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
1001 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
1002 mw.jqueryMsg.htmlEmitter.prototype = {
1003 /**
1004 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
1005 * Must return a single node to parents -- a jQuery with synthetic span
1006 * However, unwrap any other synthetic spans in our children and pass them upwards
1007 *
1008 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
1009 * @return {jQuery}
1010 */
1011 concat: function ( nodes ) {
1012 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1013 $.each( nodes, function ( i, node ) {
1014 // Let jQuery append nodes, arrays of nodes and jQuery objects
1015 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1016 appendWithoutParsing( $span, node );
1017 } );
1018 return $span;
1019 },
1020
1021 /**
1022 * Return escaped replacement of correct index, or string if unavailable.
1023 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1024 * if the specified parameter is not found return the same string
1025 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1026 *
1027 * TODO: Throw error if nodes.length > 1 ?
1028 *
1029 * @param {Array} nodes List of one element, integer, n >= 0
1030 * @param {Array} replacements List of at least n strings
1031 * @return {string} replacement
1032 */
1033 replace: function ( nodes, replacements ) {
1034 var index = parseInt( nodes[ 0 ], 10 );
1035
1036 if ( index < replacements.length ) {
1037 return replacements[ index ];
1038 } else {
1039 // index not found, fallback to displaying variable
1040 return '$' + ( index + 1 );
1041 }
1042 },
1043
1044 /**
1045 * Transform wiki-link
1046 *
1047 * TODO:
1048 * It only handles basic cases, either no pipe, or a pipe with an explicit
1049 * anchor.
1050 *
1051 * It does not attempt to handle features like the pipe trick.
1052 * However, the pipe trick should usually not be present in wikitext retrieved
1053 * from the server, since the replacement is done at save time.
1054 * It may, though, if the wikitext appears in extension-controlled content.
1055 *
1056 * @param {string[]} nodes
1057 * @return {jQuery}
1058 */
1059 wikilink: function ( nodes ) {
1060 var page, anchor, url, $el;
1061
1062 page = textify( nodes[ 0 ] );
1063 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1064 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1065 if ( page.charAt( 0 ) === ':' ) {
1066 page = page.slice( 1 );
1067 }
1068 url = mw.util.getUrl( page );
1069
1070 if ( nodes.length === 1 ) {
1071 // [[Some Page]] or [[Namespace:Some Page]]
1072 anchor = page;
1073 } else {
1074 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1075 anchor = nodes[ 1 ];
1076 }
1077
1078 $el = $( '<a>' ).attr( {
1079 title: page,
1080 href: url
1081 } );
1082 return appendWithoutParsing( $el, anchor );
1083 },
1084
1085 /**
1086 * Converts array of HTML element key value pairs to object
1087 *
1088 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1089 * name and 2 * n + 1 the associated value
1090 * @return {Object} Object mapping attribute name to attribute value
1091 */
1092 htmlattributes: function ( nodes ) {
1093 var i, len, mapping = {};
1094 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1095 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1096 }
1097 return mapping;
1098 },
1099
1100 /**
1101 * Handles an (already-validated) HTML element.
1102 *
1103 * @param {Array} nodes Nodes to process when creating element
1104 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1105 */
1106 htmlelement: function ( nodes ) {
1107 var tagName, attributes, contents, $element;
1108
1109 tagName = nodes.shift();
1110 attributes = nodes.shift();
1111 contents = nodes;
1112 $element = $( document.createElement( tagName ) ).attr( attributes );
1113 return appendWithoutParsing( $element, contents );
1114 },
1115
1116 /**
1117 * Transform parsed structure into external link.
1118 *
1119 * The "href" can be:
1120 * - a jQuery object, treat it as "enclosing" the link text.
1121 * - a function, treat it as the click handler.
1122 * - a string, or our htmlEmitter jQuery object, treat it as a URI after stringifying.
1123 *
1124 * TODO: throw an error if nodes.length > 2 ?
1125 *
1126 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1127 * @return {jQuery}
1128 */
1129 extlink: function ( nodes ) {
1130 var $el,
1131 arg = nodes[ 0 ],
1132 contents = nodes[ 1 ];
1133 if ( arg instanceof jQuery && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1134 $el = arg;
1135 } else {
1136 $el = $( '<a>' );
1137 if ( typeof arg === 'function' ) {
1138 $el.attr( {
1139 role: 'button',
1140 tabindex: 0
1141 } )
1142 .on( 'click keypress', function ( e ) {
1143 if (
1144 e.type === 'click' ||
1145 e.type === 'keypress' && e.which === 13
1146 ) {
1147 arg.call( this, e );
1148 }
1149 } );
1150 } else {
1151 $el.attr( 'href', textify( arg ) );
1152 }
1153 }
1154 return appendWithoutParsing( $el.empty(), contents );
1155 },
1156
1157 /**
1158 * Transform parsed structure into pluralization
1159 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1160 * So convert it back with the current language's convertNumber.
1161 *
1162 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1163 * @return {string} selected pluralized form according to current language
1164 */
1165 plural: function ( nodes ) {
1166 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1167 explicitPluralForms = {};
1168
1169 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1170 forms = nodes.slice( 1 );
1171 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1172 form = forms[ formIndex ];
1173
1174 if ( form instanceof jQuery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1175 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1176 firstChild = form.contents().get( 0 );
1177 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1178 firstChildText = firstChild.textContent;
1179 if ( /^\d+=/.test( firstChildText ) ) {
1180 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1181 // Use the digit part as key and rest of first text node and
1182 // rest of child nodes as value.
1183 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1184 explicitPluralForms[ explicitPluralFormNumber ] = form;
1185 forms[ formIndex ] = undefined;
1186 }
1187 }
1188 } else if ( /^\d+=/.test( form ) ) {
1189 // Simple explicit plural forms like 12=a dozen
1190 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1191 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1192 forms[ formIndex ] = undefined;
1193 }
1194 }
1195
1196 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1197 forms = $.map( forms, function ( form ) {
1198 return form;
1199 } );
1200
1201 return this.language.convertPlural( count, forms, explicitPluralForms );
1202 },
1203
1204 /**
1205 * Transform parsed structure according to gender.
1206 *
1207 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1208 *
1209 * The first node must be one of:
1210 * - the mw.user object (or a compatible one)
1211 * - an empty string - indicating the current user, same effect as passing the mw.user object
1212 * - a gender string ('male', 'female' or 'unknown')
1213 *
1214 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1215 * @return {string} Selected gender form according to current language
1216 */
1217 gender: function ( nodes ) {
1218 var gender,
1219 maybeUser = nodes[ 0 ],
1220 forms = nodes.slice( 1 );
1221
1222 if ( maybeUser === '' ) {
1223 maybeUser = mw.user;
1224 }
1225
1226 // If we are passed a mw.user-like object, check their gender.
1227 // Otherwise, assume the gender string itself was passed .
1228 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1229 gender = maybeUser.options.get( 'gender' );
1230 } else {
1231 gender = maybeUser;
1232 }
1233
1234 return this.language.gender( gender, forms );
1235 },
1236
1237 /**
1238 * Transform parsed structure into grammar conversion.
1239 * Invoked by putting `{{grammar:form|word}}` in a message
1240 *
1241 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1242 * @return {string} selected grammatical form according to current language
1243 */
1244 grammar: function ( nodes ) {
1245 var form = nodes[ 0 ],
1246 word = nodes[ 1 ];
1247 return word && form && this.language.convertGrammar( word, form );
1248 },
1249
1250 /**
1251 * Tranform parsed structure into a int: (interface language) message include
1252 * Invoked by putting `{{int:othermessage}}` into a message
1253 *
1254 * @param {Array} nodes List of nodes
1255 * @return {string} Other message
1256 */
1257 'int': function ( nodes ) {
1258 var msg = nodes[ 0 ];
1259 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1260 },
1261
1262 /**
1263 * Get localized namespace name from canonical name or namespace number.
1264 * Invoked by putting `{{ns:foo}}` into a message
1265 *
1266 * @param {Array} nodes List of nodes
1267 * @return {string} Localized namespace name
1268 */
1269 ns: function ( nodes ) {
1270 var ns = $.trim( textify( nodes[ 0 ] ) );
1271 if ( !/^\d+$/.test( ns ) ) {
1272 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1273 }
1274 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1275 return ns || '';
1276 },
1277
1278 /**
1279 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1280 * and outputs it in the localized digit script and formatted with decimal
1281 * separator, according to the current language.
1282 *
1283 * @param {Array} nodes List of nodes
1284 * @return {number|string} Formatted number
1285 */
1286 formatnum: function ( nodes ) {
1287 var isInteger = !!nodes[ 1 ] && nodes[ 1 ] === 'R',
1288 number = nodes[ 0 ];
1289
1290 return this.language.convertNumber( number, isInteger );
1291 },
1292
1293 /**
1294 * Lowercase text
1295 *
1296 * @param {Array} nodes List of nodes
1297 * @return {string} The given text, all in lowercase
1298 */
1299 lc: function ( nodes ) {
1300 return textify( nodes[ 0 ] ).toLowerCase();
1301 },
1302
1303 /**
1304 * Uppercase text
1305 *
1306 * @param {Array} nodes List of nodes
1307 * @return {string} The given text, all in uppercase
1308 */
1309 uc: function ( nodes ) {
1310 return textify( nodes[ 0 ] ).toUpperCase();
1311 },
1312
1313 /**
1314 * Lowercase first letter of input, leaving the rest unchanged
1315 *
1316 * @param {Array} nodes List of nodes
1317 * @return {string} The given text, with the first character in lowercase
1318 */
1319 lcfirst: function ( nodes ) {
1320 var text = textify( nodes[ 0 ] );
1321 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1322 },
1323
1324 /**
1325 * Uppercase first letter of input, leaving the rest unchanged
1326 *
1327 * @param {Array} nodes List of nodes
1328 * @return {string} The given text, with the first character in uppercase
1329 */
1330 ucfirst: function ( nodes ) {
1331 var text = textify( nodes[ 0 ] );
1332 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1333 }
1334 };
1335
1336 // Deprecated! don't rely on gM existing.
1337 // The window.gM ought not to be required - or if required, not required here.
1338 // But moving it to extensions breaks it (?!)
1339 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1340 // @deprecated since 1.23
1341 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1342
1343 /**
1344 * @method
1345 * @member jQuery
1346 * @see mw.jqueryMsg#getPlugin
1347 */
1348 $.fn.msg = mw.jqueryMsg.getPlugin();
1349
1350 // Replace the default message parser with jqueryMsg
1351 oldParser = mw.Message.prototype.parser;
1352 mw.Message.prototype.parser = function () {
1353 if ( this.format === 'plain' || !/\{\{|[\[<>&]/.test( this.map.get( this.key ) ) ) {
1354 // Fall back to mw.msg's simple parser
1355 return oldParser.apply( this );
1356 }
1357
1358 if ( !this.map.hasOwnProperty( this.format ) ) {
1359 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1360 messages: this.map,
1361 // For format 'escaped', escaping part is handled by mediawiki.js
1362 format: this.format
1363 } );
1364 }
1365 return this.map[ this.format ]( this.key, this.parameters );
1366 };
1367
1368 /**
1369 * Parse the message to DOM nodes, rather than HTML string like #parse.
1370 *
1371 * This method is only available when jqueryMsg is loaded.
1372 *
1373 * @since 1.27
1374 * @method parseDom
1375 * @member mw.Message
1376 * @return {jQuery}
1377 */
1378 mw.Message.prototype.parseDom = ( function () {
1379 var reusableParent = $( '<div>' );
1380 return function () {
1381 return reusableParent.msg( this.key, this.parameters ).contents().detach();
1382 };
1383 }() );
1384
1385 }( mediaWiki, jQuery ) );