b8d0b09a178e2cc8a863f88c055122227b7e3d93
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 SITENAME: mw.config.get( 'wgSiteName' )
19 },
20 // Whitelist for allowed HTML elements in wikitext.
21 // Self-closing tags are not currently supported.
22 // Can be populated via setPrivateData().
23 allowedHtmlElements: [],
24 // Key tag name, value allowed attributes for that tag.
25 // See Sanitizer::setupAttributeWhitelist
26 allowedHtmlCommonAttributes: [
27 // HTML
28 'id',
29 'class',
30 'style',
31 'lang',
32 'dir',
33 'title',
34
35 // WAI-ARIA
36 'role'
37 ],
38
39 // Attributes allowed for specific elements.
40 // Key is element name in lower case
41 // Value is array of allowed attributes for that element
42 allowedHtmlAttributesByElement: {},
43 messages: mw.messages,
44 language: mw.language,
45
46 // Same meaning as in mediawiki.js.
47 //
48 // Only 'text', 'parse', and 'escaped' are supported, and the
49 // actual escaping for 'escaped' is done by other code (generally
50 // through mediawiki.js).
51 //
52 // However, note that this default only
53 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
54 // is 'text', including when it uses jqueryMsg.
55 format: 'parse'
56 };
57
58 /**
59 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
60 * convert what it detects as an htmlString to an element.
61 *
62 * If our own htmlEmitter jQuery object is given, its children will be unwrapped and appended to
63 * new parent.
64 *
65 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
66 *
67 * @private
68 * @param {jQuery} $parent Parent node wrapped by jQuery
69 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
70 * @return {jQuery} $parent
71 */
72 function appendWithoutParsing( $parent, children ) {
73 var i, len;
74
75 if ( !$.isArray( children ) ) {
76 children = [ children ];
77 }
78
79 for ( i = 0, len = children.length; i < len; i++ ) {
80 if ( typeof children[ i ] !== 'object' ) {
81 children[ i ] = document.createTextNode( children[ i ] );
82 }
83 if ( children[ i ] instanceof jQuery && children[ i ].hasClass( 'mediaWiki_htmlEmitter' ) ) {
84 children[ i ] = children[ i ].contents();
85 }
86 }
87
88 return $parent.append( children );
89 }
90
91 /**
92 * Decodes the main HTML entities, those encoded by mw.html.escape.
93 *
94 * @private
95 * @param {string} encoded Encoded string
96 * @return {string} String with those entities decoded
97 */
98 function decodePrimaryHtmlEntities( encoded ) {
99 return encoded
100 .replace( /&#039;/g, '\'' )
101 .replace( /&quot;/g, '"' )
102 .replace( /&lt;/g, '<' )
103 .replace( /&gt;/g, '>' )
104 .replace( /&amp;/g, '&' );
105 }
106
107 /**
108 * Turn input into a string.
109 *
110 * @private
111 * @param {string|jQuery} input
112 * @return {string} Textual value of input
113 */
114 function textify( input ) {
115 if ( input instanceof jQuery ) {
116 input = input.text();
117 }
118 return String( input );
119 }
120
121 /**
122 * Given parser options, return a function that parses a key and replacements, returning jQuery object
123 *
124 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
125 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
126 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
127 *
128 * @private
129 * @param {Object} options Parser options
130 * @return {Function}
131 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
132 * @return {jQuery} return.return
133 */
134 function getFailableParserFn( options ) {
135 return function ( args ) {
136 var fallback,
137 parser = new mw.jqueryMsg.parser( options ),
138 key = args[ 0 ],
139 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
140 try {
141 return parser.parse( key, argsArray );
142 } catch ( e ) {
143 fallback = parser.settings.messages.get( key );
144 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
145 mw.track( 'mediawiki.jqueryMsg.error', {
146 messageKey: key,
147 errorMessage: e.message
148 } );
149 return $( '<span>' ).text( fallback );
150 }
151 };
152 }
153
154 mw.jqueryMsg = {};
155
156 /**
157 * Initialize parser defaults.
158 *
159 * ResourceLoaderJqueryMsgModule calls this to provide default values from
160 * Sanitizer.php for allowed HTML elements. To override this data for individual
161 * parsers, pass the relevant options to mw.jqueryMsg.parser.
162 *
163 * @private
164 * @param {Object} data
165 */
166 mw.jqueryMsg.setParserDefaults = function ( data ) {
167 $.extend( parserDefaults, data );
168 };
169
170 /**
171 * Get current parser defaults.
172 *
173 * Primarily used for the unit test. Returns a copy.
174 *
175 * @private
176 * @return {Object}
177 */
178 mw.jqueryMsg.getParserDefaults = function () {
179 return $.extend( {}, parserDefaults );
180 };
181
182 /**
183 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
184 * e.g.
185 *
186 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
187 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
188 *
189 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
190 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
191 *
192 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
193 * somefunction( a, b, c, d )
194 * is equivalent to
195 * somefunction( a, [b, c, d] )
196 *
197 * @param {Object} options parser options
198 * @return {Function} Function suitable for assigning to window.gM
199 * @return {string} return.key Message key.
200 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
201 * @return {string} return.return Rendered HTML.
202 */
203 mw.jqueryMsg.getMessageFunction = function ( options ) {
204 var failableParserFn, format;
205
206 if ( options && options.format !== undefined ) {
207 format = options.format;
208 } else {
209 format = parserDefaults.format;
210 }
211
212 return function () {
213 if ( !failableParserFn ) {
214 failableParserFn = getFailableParserFn( options );
215 }
216 var failableResult = failableParserFn( arguments );
217 if ( format === 'text' || format === 'escaped' ) {
218 return failableResult.text();
219 } else {
220 return failableResult.html();
221 }
222 };
223 };
224
225 /**
226 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
227 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
228 * e.g.
229 *
230 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
231 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
232 * $( 'p#headline' ).msg( 'hello-user', userlink );
233 *
234 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
235 * somefunction( a, b, c, d )
236 * is equivalent to
237 * somefunction( a, [b, c, d] )
238 *
239 * We append to 'this', which in a jQuery plugin context will be the selected elements.
240 *
241 * @param {Object} options Parser options
242 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
243 * @return {string} return.key Message key.
244 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
245 * @return {jQuery} return.return
246 */
247 mw.jqueryMsg.getPlugin = function ( options ) {
248 var failableParserFn;
249
250 return function () {
251 if ( !failableParserFn ) {
252 failableParserFn = getFailableParserFn( options );
253 }
254 var $target = this.empty();
255 appendWithoutParsing( $target, failableParserFn( arguments ) );
256 return $target;
257 };
258 };
259
260 /**
261 * The parser itself.
262 * Describes an object, whose primary duty is to .parse() message keys.
263 *
264 * @class
265 * @private
266 * @param {Object} options
267 */
268 mw.jqueryMsg.parser = function ( options ) {
269 this.settings = $.extend( {}, parserDefaults, options );
270 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
271 this.astCache = {};
272
273 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
274 };
275
276 mw.jqueryMsg.parser.prototype = {
277 /**
278 * Where the magic happens.
279 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
280 * If an error is thrown, returns original key, and logs the error
281 *
282 * @param {string} key Message key.
283 * @param {Array} replacements Variable replacements for $1, $2... $n
284 * @return {jQuery}
285 */
286 parse: function ( key, replacements ) {
287 var ast = this.getAst( key );
288 return this.emitter.emit( ast, replacements );
289 },
290
291 /**
292 * Fetch the message string associated with a key, return parsed structure. Memoized.
293 * Note that we pass '[' + key + ']' back for a missing message here.
294 *
295 * @param {string} key
296 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
297 */
298 getAst: function ( key ) {
299 var wikiText;
300
301 if ( !this.astCache.hasOwnProperty( key ) ) {
302 wikiText = this.settings.messages.get( key );
303 if ( typeof wikiText !== 'string' ) {
304 wikiText = '\\[' + key + '\\]';
305 }
306 this.astCache[ key ] = this.wikiTextToAst( wikiText );
307 }
308 return this.astCache[ key ];
309 },
310
311 /**
312 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
313 *
314 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
315 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
316 *
317 * @param {string} input Message string wikitext
318 * @throws Error
319 * @return {Mixed} abstract syntax tree
320 */
321 wikiTextToAst: function ( input ) {
322 var pos,
323 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
324 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
325 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
326 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
327 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
328 openExtlink, closeExtlink, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
329 templateContents, openTemplate, closeTemplate,
330 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
331 settings = this.settings,
332 concat = Array.prototype.concat;
333
334 // Indicates current position in input as we parse through it.
335 // Shared among all parsing functions below.
336 pos = 0;
337
338 // =========================================================
339 // parsing combinators - could be a library on its own
340 // =========================================================
341
342 /**
343 * Try parsers until one works, if none work return null
344 *
345 * @private
346 * @param {Function[]} ps
347 * @return {string|null}
348 */
349 function choice( ps ) {
350 return function () {
351 var i, result;
352 for ( i = 0; i < ps.length; i++ ) {
353 result = ps[ i ]();
354 if ( result !== null ) {
355 return result;
356 }
357 }
358 return null;
359 };
360 }
361
362 /**
363 * Try several ps in a row, all must succeed or return null.
364 * This is the only eager one.
365 *
366 * @private
367 * @param {Function[]} ps
368 * @return {string|null}
369 */
370 function sequence( ps ) {
371 var i, res,
372 originalPos = pos,
373 result = [];
374 for ( i = 0; i < ps.length; i++ ) {
375 res = ps[ i ]();
376 if ( res === null ) {
377 pos = originalPos;
378 return null;
379 }
380 result.push( res );
381 }
382 return result;
383 }
384
385 /**
386 * Run the same parser over and over until it fails.
387 * Must succeed a minimum of n times or return null.
388 *
389 * @private
390 * @param {number} n
391 * @param {Function} p
392 * @return {string|null}
393 */
394 function nOrMore( n, p ) {
395 return function () {
396 var originalPos = pos,
397 result = [],
398 parsed = p();
399 while ( parsed !== null ) {
400 result.push( parsed );
401 parsed = p();
402 }
403 if ( result.length < n ) {
404 pos = originalPos;
405 return null;
406 }
407 return result;
408 };
409 }
410
411 /**
412 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
413 *
414 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
415 * May be some scoping issue
416 *
417 * @private
418 * @param {Function} p
419 * @param {Function} fn
420 * @return {string|null}
421 */
422 function transform( p, fn ) {
423 return function () {
424 var result = p();
425 return result === null ? null : fn( result );
426 };
427 }
428
429 /**
430 * Just make parsers out of simpler JS builtin types
431 *
432 * @private
433 * @param {string} s
434 * @return {Function}
435 * @return {string} return.return
436 */
437 function makeStringParser( s ) {
438 var len = s.length;
439 return function () {
440 var result = null;
441 if ( input.substr( pos, len ) === s ) {
442 result = s;
443 pos += len;
444 }
445 return result;
446 };
447 }
448
449 /**
450 * Makes a regex parser, given a RegExp object.
451 * The regex being passed in should start with a ^ to anchor it to the start
452 * of the string.
453 *
454 * @private
455 * @param {RegExp} regex anchored regex
456 * @return {Function} function to parse input based on the regex
457 */
458 function makeRegexParser( regex ) {
459 return function () {
460 var matches = input.slice( pos ).match( regex );
461 if ( matches === null ) {
462 return null;
463 }
464 pos += matches[ 0 ].length;
465 return matches[ 0 ];
466 };
467 }
468
469 // ===================================================================
470 // General patterns above this line -- wikitext specific parsers below
471 // ===================================================================
472
473 // Parsing functions follow. All parsing functions work like this:
474 // They don't accept any arguments.
475 // Instead, they just operate non destructively on the string 'input'
476 // As they can consume parts of the string, they advance the shared variable pos,
477 // and return tokens (or whatever else they want to return).
478 // some things are defined as closures and other things as ordinary functions
479 // converting everything to a closure makes it a lot harder to debug... errors pop up
480 // but some debuggers can't tell you exactly where they come from. Also the mutually
481 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
482 // This may be because, to save code, memoization was removed
483
484 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
485 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
486 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
487 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
488
489 backslash = makeStringParser( '\\' );
490 doubleQuote = makeStringParser( '"' );
491 singleQuote = makeStringParser( '\'' );
492 anyCharacter = makeRegexParser( /^./ );
493
494 openHtmlStartTag = makeStringParser( '<' );
495 optionalForwardSlash = makeRegexParser( /^\/?/ );
496 openHtmlEndTag = makeStringParser( '</' );
497 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
498 closeHtmlTag = makeRegexParser( /^\s*>/ );
499
500 function escapedLiteral() {
501 var result = sequence( [
502 backslash,
503 anyCharacter
504 ] );
505 return result === null ? null : result[ 1 ];
506 }
507 escapedOrLiteralWithoutSpace = choice( [
508 escapedLiteral,
509 regularLiteralWithoutSpace
510 ] );
511 escapedOrLiteralWithoutBar = choice( [
512 escapedLiteral,
513 regularLiteralWithoutBar
514 ] );
515 escapedOrRegularLiteral = choice( [
516 escapedLiteral,
517 regularLiteral
518 ] );
519 // Used to define "literals" without spaces, in space-delimited situations
520 function literalWithoutSpace() {
521 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
522 return result === null ? null : result.join( '' );
523 }
524 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
525 // it is not a literal in the parameter
526 function literalWithoutBar() {
527 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
528 return result === null ? null : result.join( '' );
529 }
530
531 function literal() {
532 var result = nOrMore( 1, escapedOrRegularLiteral )();
533 return result === null ? null : result.join( '' );
534 }
535
536 function curlyBraceTransformExpressionLiteral() {
537 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
538 return result === null ? null : result.join( '' );
539 }
540
541 asciiAlphabetLiteral = makeRegexParser( /^[A-Za-z]+/ );
542 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
543 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
544
545 whitespace = makeRegexParser( /^\s+/ );
546 dollar = makeStringParser( '$' );
547 digits = makeRegexParser( /^\d+/ );
548
549 function replacement() {
550 var result = sequence( [
551 dollar,
552 digits
553 ] );
554 if ( result === null ) {
555 return null;
556 }
557 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
558 }
559 openExtlink = makeStringParser( '[' );
560 closeExtlink = makeStringParser( ']' );
561 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
562 function extlink() {
563 var result, parsedResult, target;
564 result = null;
565 parsedResult = sequence( [
566 openExtlink,
567 nOrMore( 1, nonWhitespaceExpression ),
568 whitespace,
569 nOrMore( 1, expression ),
570 closeExtlink
571 ] );
572 if ( parsedResult !== null ) {
573 // When the entire link target is a single parameter, we can't use CONCAT, as we allow
574 // passing fancy parameters (like a whole jQuery object or a function) to use for the
575 // link. Check only if it's a single match, since we can either do CONCAT or not for
576 // singles with the same effect.
577 target = parsedResult[ 1 ].length === 1 ?
578 parsedResult[ 1 ][ 0 ] :
579 [ 'CONCAT' ].concat( parsedResult[ 1 ] );
580 result = [
581 'EXTLINK',
582 target,
583 [ 'CONCAT' ].concat( parsedResult[ 3 ] )
584 ];
585 }
586 return result;
587 }
588 openWikilink = makeStringParser( '[[' );
589 closeWikilink = makeStringParser( ']]' );
590 pipe = makeStringParser( '|' );
591
592 function template() {
593 var result = sequence( [
594 openTemplate,
595 templateContents,
596 closeTemplate
597 ] );
598 return result === null ? null : result[ 1 ];
599 }
600
601 function pipedWikilink() {
602 var result = sequence( [
603 nOrMore( 1, paramExpression ),
604 pipe,
605 nOrMore( 1, expression )
606 ] );
607 return result === null ? null : [
608 [ 'CONCAT' ].concat( result[ 0 ] ),
609 [ 'CONCAT' ].concat( result[ 2 ] )
610 ];
611 }
612
613 function unpipedWikilink() {
614 var result = sequence( [
615 nOrMore( 1, paramExpression )
616 ] );
617 return result === null ? null : [
618 [ 'CONCAT' ].concat( result[ 0 ] )
619 ];
620 }
621
622 wikilinkContents = choice( [
623 pipedWikilink,
624 unpipedWikilink
625 ] );
626
627 function wikilink() {
628 var result, parsedResult, parsedLinkContents;
629 result = null;
630
631 parsedResult = sequence( [
632 openWikilink,
633 wikilinkContents,
634 closeWikilink
635 ] );
636 if ( parsedResult !== null ) {
637 parsedLinkContents = parsedResult[ 1 ];
638 result = [ 'WIKILINK' ].concat( parsedLinkContents );
639 }
640 return result;
641 }
642
643 // TODO: Support data- if appropriate
644 function doubleQuotedHtmlAttributeValue() {
645 var parsedResult = sequence( [
646 doubleQuote,
647 htmlDoubleQuoteAttributeValue,
648 doubleQuote
649 ] );
650 return parsedResult === null ? null : parsedResult[ 1 ];
651 }
652
653 function singleQuotedHtmlAttributeValue() {
654 var parsedResult = sequence( [
655 singleQuote,
656 htmlSingleQuoteAttributeValue,
657 singleQuote
658 ] );
659 return parsedResult === null ? null : parsedResult[ 1 ];
660 }
661
662 function htmlAttribute() {
663 var parsedResult = sequence( [
664 whitespace,
665 asciiAlphabetLiteral,
666 htmlAttributeEquals,
667 choice( [
668 doubleQuotedHtmlAttributeValue,
669 singleQuotedHtmlAttributeValue
670 ] )
671 ] );
672 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
673 }
674
675 /**
676 * Checks if HTML is allowed
677 *
678 * @param {string} startTagName HTML start tag name
679 * @param {string} endTagName HTML start tag name
680 * @param {Object} attributes array of consecutive key value pairs,
681 * with index 2 * n being a name and 2 * n + 1 the associated value
682 * @return {boolean} true if this is HTML is allowed, false otherwise
683 */
684 function isAllowedHtml( startTagName, endTagName, attributes ) {
685 var i, len, attributeName;
686
687 startTagName = startTagName.toLowerCase();
688 endTagName = endTagName.toLowerCase();
689 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
690 return false;
691 }
692
693 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
694 attributeName = attributes[ i ];
695 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
696 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
697 return false;
698 }
699 }
700
701 return true;
702 }
703
704 function htmlAttributes() {
705 var parsedResult = nOrMore( 0, htmlAttribute )();
706 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
707 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
708 }
709
710 // Subset of allowed HTML markup.
711 // Most elements and many attributes allowed on the server are not supported yet.
712 function html() {
713 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
714 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
715 startCloseTagPos, endOpenTagPos, endCloseTagPos,
716 result = null;
717
718 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
719 // 1. open through closeHtmlTag
720 // 2. expression
721 // 3. openHtmlEnd through close
722 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
723
724 startOpenTagPos = pos;
725 parsedOpenTagResult = sequence( [
726 openHtmlStartTag,
727 asciiAlphabetLiteral,
728 htmlAttributes,
729 optionalForwardSlash,
730 closeHtmlTag
731 ] );
732
733 if ( parsedOpenTagResult === null ) {
734 return null;
735 }
736
737 endOpenTagPos = pos;
738 startTagName = parsedOpenTagResult[ 1 ];
739
740 parsedHtmlContents = nOrMore( 0, expression )();
741
742 startCloseTagPos = pos;
743 parsedCloseTagResult = sequence( [
744 openHtmlEndTag,
745 asciiAlphabetLiteral,
746 closeHtmlTag
747 ] );
748
749 if ( parsedCloseTagResult === null ) {
750 // Closing tag failed. Return the start tag and contents.
751 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
752 .concat( parsedHtmlContents );
753 }
754
755 endCloseTagPos = pos;
756 endTagName = parsedCloseTagResult[ 1 ];
757 wrappedAttributes = parsedOpenTagResult[ 2 ];
758 attributes = wrappedAttributes.slice( 1 );
759 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
760 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
761 .concat( parsedHtmlContents );
762 } else {
763 // HTML is not allowed, so contents will remain how
764 // it was, while HTML markup at this level will be
765 // treated as text
766 // E.g. assuming script tags are not allowed:
767 //
768 // <script>[[Foo|bar]]</script>
769 //
770 // results in '&lt;script&gt;' and '&lt;/script&gt;'
771 // (not treated as an HTML tag), surrounding a fully
772 // parsed HTML link.
773 //
774 // Concatenate everything from the tag, flattening the contents.
775 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
776 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
777 }
778
779 return result;
780 }
781
782 // <nowiki>...</nowiki> tag. The tags are stripped and the contents are returned unparsed.
783 function nowiki() {
784 var parsedResult, plainText,
785 result = null;
786
787 parsedResult = sequence( [
788 makeStringParser( '<nowiki>' ),
789 // We use a greedy non-backtracking parser, so we must ensure here that we don't take too much
790 makeRegexParser( /^.*?(?=<\/nowiki>)/ ),
791 makeStringParser( '</nowiki>' )
792 ] );
793 if ( parsedResult !== null ) {
794 plainText = parsedResult[ 1 ];
795 result = [ 'CONCAT' ].concat( plainText );
796 }
797
798 return result;
799 }
800
801 templateName = transform(
802 // see $wgLegalTitleChars
803 // not allowing : due to the need to catch "PLURAL:$1"
804 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
805 function ( result ) { return result.toString(); }
806 );
807 function templateParam() {
808 var expr, result;
809 result = sequence( [
810 pipe,
811 nOrMore( 0, paramExpression )
812 ] );
813 if ( result === null ) {
814 return null;
815 }
816 expr = result[ 1 ];
817 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
818 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
819 }
820
821 function templateWithReplacement() {
822 var result = sequence( [
823 templateName,
824 colon,
825 replacement
826 ] );
827 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
828 }
829 function templateWithOutReplacement() {
830 var result = sequence( [
831 templateName,
832 colon,
833 paramExpression
834 ] );
835 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
836 }
837 function templateWithOutFirstParameter() {
838 var result = sequence( [
839 templateName,
840 colon
841 ] );
842 return result === null ? null : [ result[ 0 ], '' ];
843 }
844 colon = makeStringParser( ':' );
845 templateContents = choice( [
846 function () {
847 var res = sequence( [
848 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
849 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
850 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
851 nOrMore( 0, templateParam )
852 ] );
853 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
854 },
855 function () {
856 var res = sequence( [
857 templateName,
858 nOrMore( 0, templateParam )
859 ] );
860 if ( res === null ) {
861 return null;
862 }
863 return [ res[ 0 ] ].concat( res[ 1 ] );
864 }
865 ] );
866 openTemplate = makeStringParser( '{{' );
867 closeTemplate = makeStringParser( '}}' );
868 nonWhitespaceExpression = choice( [
869 template,
870 wikilink,
871 extlink,
872 replacement,
873 literalWithoutSpace
874 ] );
875 paramExpression = choice( [
876 template,
877 wikilink,
878 extlink,
879 replacement,
880 literalWithoutBar
881 ] );
882
883 expression = choice( [
884 template,
885 wikilink,
886 extlink,
887 replacement,
888 nowiki,
889 html,
890 literal
891 ] );
892
893 // Used when only {{-transformation is wanted, for 'text'
894 // or 'escaped' formats
895 curlyBraceTransformExpression = choice( [
896 template,
897 replacement,
898 curlyBraceTransformExpressionLiteral
899 ] );
900
901 /**
902 * Starts the parse
903 *
904 * @param {Function} rootExpression root parse function
905 */
906 function start( rootExpression ) {
907 var result = nOrMore( 0, rootExpression )();
908 if ( result === null ) {
909 return null;
910 }
911 return [ 'CONCAT' ].concat( result );
912 }
913 // everything above this point is supposed to be stateless/static, but
914 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
915 // finally let's do some actual work...
916
917 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
918
919 /*
920 * For success, the p must have gotten to the end of the input
921 * and returned a non-null.
922 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
923 */
924 if ( result === null || pos !== input.length ) {
925 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
926 }
927 return result;
928 }
929
930 };
931
932 /**
933 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
934 */
935 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
936 this.language = language;
937 var jmsg = this;
938 $.each( magic, function ( key, val ) {
939 jmsg[ key.toLowerCase() ] = function () {
940 return val;
941 };
942 } );
943
944 /**
945 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
946 * Walk entire node structure, applying replacements and template functions when appropriate
947 *
948 * @param {Mixed} node Abstract syntax tree (top node or subnode)
949 * @param {Array} replacements for $1, $2, ... $n
950 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
951 */
952 this.emit = function ( node, replacements ) {
953 var ret, subnodes, operation,
954 jmsg = this;
955 switch ( typeof node ) {
956 case 'string':
957 case 'number':
958 ret = node;
959 break;
960 // typeof returns object for arrays
961 case 'object':
962 // node is an array of nodes
963 subnodes = $.map( node.slice( 1 ), function ( n ) {
964 return jmsg.emit( n, replacements );
965 } );
966 operation = node[ 0 ].toLowerCase();
967 if ( typeof jmsg[ operation ] === 'function' ) {
968 ret = jmsg[ operation ]( subnodes, replacements );
969 } else {
970 throw new Error( 'Unknown operation "' + operation + '"' );
971 }
972 break;
973 case 'undefined':
974 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
975 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
976 // The logical thing is probably to return the empty string here when we encounter undefined.
977 ret = '';
978 break;
979 default:
980 throw new Error( 'Unexpected type in AST: ' + typeof node );
981 }
982 return ret;
983 };
984 };
985
986 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
987 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
988 // If you have 'magic words' then configure the parser to have them upon creation.
989 //
990 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
991 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
992 mw.jqueryMsg.htmlEmitter.prototype = {
993 /**
994 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
995 * Must return a single node to parents -- a jQuery with synthetic span
996 * However, unwrap any other synthetic spans in our children and pass them upwards
997 *
998 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
999 * @return {jQuery}
1000 */
1001 concat: function ( nodes ) {
1002 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
1003 $.each( nodes, function ( i, node ) {
1004 // Let jQuery append nodes, arrays of nodes and jQuery objects
1005 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
1006 appendWithoutParsing( $span, node );
1007 } );
1008 return $span;
1009 },
1010
1011 /**
1012 * Return escaped replacement of correct index, or string if unavailable.
1013 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1014 * if the specified parameter is not found return the same string
1015 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1016 *
1017 * TODO: Throw error if nodes.length > 1 ?
1018 *
1019 * @param {Array} nodes List of one element, integer, n >= 0
1020 * @param {Array} replacements List of at least n strings
1021 * @return {string} replacement
1022 */
1023 replace: function ( nodes, replacements ) {
1024 var index = parseInt( nodes[ 0 ], 10 );
1025
1026 if ( index < replacements.length ) {
1027 return replacements[ index ];
1028 } else {
1029 // index not found, fallback to displaying variable
1030 return '$' + ( index + 1 );
1031 }
1032 },
1033
1034 /**
1035 * Transform wiki-link
1036 *
1037 * TODO:
1038 * It only handles basic cases, either no pipe, or a pipe with an explicit
1039 * anchor.
1040 *
1041 * It does not attempt to handle features like the pipe trick.
1042 * However, the pipe trick should usually not be present in wikitext retrieved
1043 * from the server, since the replacement is done at save time.
1044 * It may, though, if the wikitext appears in extension-controlled content.
1045 *
1046 * @param {string[]} nodes
1047 */
1048 wikilink: function ( nodes ) {
1049 var page, anchor, url, $el;
1050
1051 page = textify( nodes[ 0 ] );
1052 // Strip leading ':', which is used to suppress special behavior in wikitext links,
1053 // e.g. [[:Category:Foo]] or [[:File:Foo.jpg]]
1054 if ( page.charAt( 0 ) === ':' ) {
1055 page = page.slice( 1 );
1056 }
1057 url = mw.util.getUrl( page );
1058
1059 if ( nodes.length === 1 ) {
1060 // [[Some Page]] or [[Namespace:Some Page]]
1061 anchor = page;
1062 } else {
1063 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1064 anchor = nodes[ 1 ];
1065 }
1066
1067 $el = $( '<a>' ).attr( {
1068 title: page,
1069 href: url
1070 } );
1071 return appendWithoutParsing( $el, anchor );
1072 },
1073
1074 /**
1075 * Converts array of HTML element key value pairs to object
1076 *
1077 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1078 * name and 2 * n + 1 the associated value
1079 * @return {Object} Object mapping attribute name to attribute value
1080 */
1081 htmlattributes: function ( nodes ) {
1082 var i, len, mapping = {};
1083 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1084 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1085 }
1086 return mapping;
1087 },
1088
1089 /**
1090 * Handles an (already-validated) HTML element.
1091 *
1092 * @param {Array} nodes Nodes to process when creating element
1093 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1094 */
1095 htmlelement: function ( nodes ) {
1096 var tagName, attributes, contents, $element;
1097
1098 tagName = nodes.shift();
1099 attributes = nodes.shift();
1100 contents = nodes;
1101 $element = $( document.createElement( tagName ) ).attr( attributes );
1102 return appendWithoutParsing( $element, contents );
1103 },
1104
1105 /**
1106 * Transform parsed structure into external link.
1107 *
1108 * The "href" can be:
1109 * - a jQuery object, treat it as "enclosing" the link text.
1110 * - a function, treat it as the click handler.
1111 * - a string, or our htmlEmitter jQuery object, treat it as a URI after stringifying.
1112 *
1113 * TODO: throw an error if nodes.length > 2 ?
1114 *
1115 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {string}
1116 * @return {jQuery}
1117 */
1118 extlink: function ( nodes ) {
1119 var $el,
1120 arg = nodes[ 0 ],
1121 contents = nodes[ 1 ];
1122 if ( arg instanceof jQuery && !arg.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1123 $el = arg;
1124 } else {
1125 $el = $( '<a>' );
1126 if ( typeof arg === 'function' ) {
1127 $el.attr( 'href', '#' )
1128 .click( function ( e ) {
1129 e.preventDefault();
1130 } )
1131 .click( arg );
1132 } else {
1133 $el.attr( 'href', textify( arg ) );
1134 }
1135 }
1136 return appendWithoutParsing( $el.empty(), contents );
1137 },
1138
1139 /**
1140 * Transform parsed structure into pluralization
1141 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1142 * So convert it back with the current language's convertNumber.
1143 *
1144 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1145 * @return {string} selected pluralized form according to current language
1146 */
1147 plural: function ( nodes ) {
1148 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1149 explicitPluralForms = {};
1150
1151 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1152 forms = nodes.slice( 1 );
1153 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1154 form = forms[ formIndex ];
1155
1156 if ( form instanceof jQuery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1157 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1158 firstChild = form.contents().get( 0 );
1159 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1160 firstChildText = firstChild.textContent;
1161 if ( /^\d+=/.test( firstChildText ) ) {
1162 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1163 // Use the digit part as key and rest of first text node and
1164 // rest of child nodes as value.
1165 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1166 explicitPluralForms[ explicitPluralFormNumber ] = form;
1167 forms[ formIndex ] = undefined;
1168 }
1169 }
1170 } else if ( /^\d+=/.test( form ) ) {
1171 // Simple explicit plural forms like 12=a dozen
1172 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1173 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1174 forms[ formIndex ] = undefined;
1175 }
1176 }
1177
1178 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1179 forms = $.map( forms, function ( form ) {
1180 return form;
1181 } );
1182
1183 return this.language.convertPlural( count, forms, explicitPluralForms );
1184 },
1185
1186 /**
1187 * Transform parsed structure according to gender.
1188 *
1189 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1190 *
1191 * The first node must be one of:
1192 * - the mw.user object (or a compatible one)
1193 * - an empty string - indicating the current user, same effect as passing the mw.user object
1194 * - a gender string ('male', 'female' or 'unknown')
1195 *
1196 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1197 * @return {string} Selected gender form according to current language
1198 */
1199 gender: function ( nodes ) {
1200 var gender,
1201 maybeUser = nodes[ 0 ],
1202 forms = nodes.slice( 1 );
1203
1204 if ( maybeUser === '' ) {
1205 maybeUser = mw.user;
1206 }
1207
1208 // If we are passed a mw.user-like object, check their gender.
1209 // Otherwise, assume the gender string itself was passed .
1210 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1211 gender = maybeUser.options.get( 'gender' );
1212 } else {
1213 gender = maybeUser;
1214 }
1215
1216 return this.language.gender( gender, forms );
1217 },
1218
1219 /**
1220 * Transform parsed structure into grammar conversion.
1221 * Invoked by putting `{{grammar:form|word}}` in a message
1222 *
1223 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1224 * @return {string} selected grammatical form according to current language
1225 */
1226 grammar: function ( nodes ) {
1227 var form = nodes[ 0 ],
1228 word = nodes[ 1 ];
1229 return word && form && this.language.convertGrammar( word, form );
1230 },
1231
1232 /**
1233 * Tranform parsed structure into a int: (interface language) message include
1234 * Invoked by putting `{{int:othermessage}}` into a message
1235 *
1236 * @param {Array} nodes List of nodes
1237 * @return {string} Other message
1238 */
1239 'int': function ( nodes ) {
1240 var msg = nodes[ 0 ];
1241 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1242 },
1243
1244 /**
1245 * Get localized namespace name from canonical name or namespace number.
1246 * Invoked by putting `{{ns:foo}}` into a message
1247 *
1248 * @param {Array} nodes List of nodes
1249 * @return {string} Localized namespace name
1250 */
1251 ns: function ( nodes ) {
1252 var ns = $.trim( textify( nodes[ 0 ] ) );
1253 if ( !/^\d+$/.test( ns ) ) {
1254 ns = mw.config.get( 'wgNamespaceIds' )[ ns.replace( / /g, '_' ).toLowerCase() ];
1255 }
1256 ns = mw.config.get( 'wgFormattedNamespaces' )[ ns ];
1257 return ns || '';
1258 },
1259
1260 /**
1261 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1262 * and outputs it in the localized digit script and formatted with decimal
1263 * separator, according to the current language.
1264 *
1265 * @param {Array} nodes List of nodes
1266 * @return {number|string} Formatted number
1267 */
1268 formatnum: function ( nodes ) {
1269 var isInteger = ( nodes[ 1 ] && nodes[ 1 ] === 'R' ) ? true : false,
1270 number = nodes[ 0 ];
1271
1272 return this.language.convertNumber( number, isInteger );
1273 },
1274
1275 /**
1276 * Lowercase text
1277 *
1278 * @param {Array} nodes List of nodes
1279 * @return {string} The given text, all in lowercase
1280 */
1281 lc: function ( nodes ) {
1282 return textify( nodes[ 0 ] ).toLowerCase();
1283 },
1284
1285 /**
1286 * Uppercase text
1287 *
1288 * @param {Array} nodes List of nodes
1289 * @return {string} The given text, all in uppercase
1290 */
1291 uc: function ( nodes ) {
1292 return textify( nodes[ 0 ] ).toUpperCase();
1293 },
1294
1295 /**
1296 * Lowercase first letter of input, leaving the rest unchanged
1297 *
1298 * @param {Array} nodes List of nodes
1299 * @return {string} The given text, with the first character in lowercase
1300 */
1301 lcfirst: function ( nodes ) {
1302 var text = textify( nodes[ 0 ] );
1303 return text.charAt( 0 ).toLowerCase() + text.slice( 1 );
1304 },
1305
1306 /**
1307 * Uppercase first letter of input, leaving the rest unchanged
1308 *
1309 * @param {Array} nodes List of nodes
1310 * @return {string} The given text, with the first character in uppercase
1311 */
1312 ucfirst: function ( nodes ) {
1313 var text = textify( nodes[ 0 ] );
1314 return text.charAt( 0 ).toUpperCase() + text.slice( 1 );
1315 }
1316 };
1317
1318 // Deprecated! don't rely on gM existing.
1319 // The window.gM ought not to be required - or if required, not required here.
1320 // But moving it to extensions breaks it (?!)
1321 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1322 // @deprecated since 1.23
1323 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1324
1325 /**
1326 * @method
1327 * @member jQuery
1328 * @see mw.jqueryMsg#getPlugin
1329 */
1330 $.fn.msg = mw.jqueryMsg.getPlugin();
1331
1332 // Replace the default message parser with jqueryMsg
1333 oldParser = mw.Message.prototype.parser;
1334 mw.Message.prototype.parser = function () {
1335 if ( this.format === 'plain' || !/\{\{|[\[<>&]/.test( this.map.get( this.key ) ) ) {
1336 // Fall back to mw.msg's simple parser
1337 return oldParser.apply( this );
1338 }
1339
1340 if ( !this.map.hasOwnProperty( this.format ) ) {
1341 this.map[ this.format ] = mw.jqueryMsg.getMessageFunction( {
1342 messages: this.map,
1343 // For format 'escaped', escaping part is handled by mediawiki.js
1344 format: this.format
1345 } );
1346 }
1347 return this.map[ this.format ]( this.key, this.parameters );
1348 };
1349
1350 /**
1351 * Parse the message to DOM nodes, rather than HTML string like #parse.
1352 *
1353 * This method is only available when jqueryMsg is loaded.
1354 *
1355 * @method parseDom
1356 * @member mw.Message
1357 * @return {jQuery}
1358 */
1359 mw.Message.prototype.parseDom = ( function () {
1360 var reusableParent = $( '<div>' );
1361 return function () {
1362 return reusableParent.msg( this.key, this.parameters ).contents().detach();
1363 };
1364 } )();
1365
1366 }( mediaWiki, jQuery ) );