Merge "SpecialMovepage: Convert form to use OOUI controls"
[lhc/web/wiklou.git] / resources / src / mediawiki / mediawiki.jqueryMsg.js
1 /*!
2 * Experimental advanced wikitext parser-emitter.
3 * See: https://www.mediawiki.org/wiki/Extension:UploadWizard/MessageParser for docs
4 *
5 * @author neilk@wikimedia.org
6 * @author mflaschen@wikimedia.org
7 */
8 ( function ( mw, $ ) {
9 /**
10 * @class mw.jqueryMsg
11 * @singleton
12 */
13
14 var oldParser,
15 slice = Array.prototype.slice,
16 parserDefaults = {
17 magic: {
18 SITENAME: mw.config.get( 'wgSiteName' )
19 },
20 // Whitelist for allowed HTML elements in wikitext.
21 // Self-closing tags are not currently supported.
22 // Can be populated via setPrivateData().
23 allowedHtmlElements: [],
24 // Key tag name, value allowed attributes for that tag.
25 // See Sanitizer::setupAttributeWhitelist
26 allowedHtmlCommonAttributes: [
27 // HTML
28 'id',
29 'class',
30 'style',
31 'lang',
32 'dir',
33 'title',
34
35 // WAI-ARIA
36 'role'
37 ],
38
39 // Attributes allowed for specific elements.
40 // Key is element name in lower case
41 // Value is array of allowed attributes for that element
42 allowedHtmlAttributesByElement: {},
43 messages: mw.messages,
44 language: mw.language,
45
46 // Same meaning as in mediawiki.js.
47 //
48 // Only 'text', 'parse', and 'escaped' are supported, and the
49 // actual escaping for 'escaped' is done by other code (generally
50 // through mediawiki.js).
51 //
52 // However, note that this default only
53 // applies to direct calls to jqueryMsg. The default for mediawiki.js itself
54 // is 'text', including when it uses jqueryMsg.
55 format: 'parse'
56
57 };
58
59 /**
60 * Wrapper around jQuery append that converts all non-objects to TextNode so append will not
61 * convert what it detects as an htmlString to an element.
62 *
63 * Object elements of children (jQuery, HTMLElement, TextNode, etc.) will be left as is.
64 *
65 * @private
66 * @param {jQuery} $parent Parent node wrapped by jQuery
67 * @param {Object|string|Array} children What to append, with the same possible types as jQuery
68 * @return {jQuery} $parent
69 */
70 function appendWithoutParsing( $parent, children ) {
71 var i, len;
72
73 if ( !$.isArray( children ) ) {
74 children = [ children ];
75 }
76
77 for ( i = 0, len = children.length; i < len; i++ ) {
78 if ( typeof children[ i ] !== 'object' ) {
79 children[ i ] = document.createTextNode( children[ i ] );
80 }
81 }
82
83 return $parent.append( children );
84 }
85
86 /**
87 * Decodes the main HTML entities, those encoded by mw.html.escape.
88 *
89 * @private
90 * @param {string} encoded Encoded string
91 * @return {string} String with those entities decoded
92 */
93 function decodePrimaryHtmlEntities( encoded ) {
94 return encoded
95 .replace( /&#039;/g, '\'' )
96 .replace( /&quot;/g, '"' )
97 .replace( /&lt;/g, '<' )
98 .replace( /&gt;/g, '>' )
99 .replace( /&amp;/g, '&' );
100 }
101
102 /**
103 * Given parser options, return a function that parses a key and replacements, returning jQuery object
104 *
105 * Try to parse a key and optional replacements, returning a jQuery object that may be a tree of jQuery nodes.
106 * If there was an error parsing, return the key and the error message (wrapped in jQuery). This should put the error right into
107 * the interface, without causing the page to halt script execution, and it hopefully should be clearer how to fix it.
108 *
109 * @private
110 * @param {Object} options Parser options
111 * @return {Function}
112 * @return {Array} return.args First element is the key, replacements may be in array in 2nd element, or remaining elements.
113 * @return {jQuery} return.return
114 */
115 function getFailableParserFn( options ) {
116 var parser = new mw.jqueryMsg.parser( options );
117
118 return function ( args ) {
119 var fallback,
120 key = args[ 0 ],
121 argsArray = $.isArray( args[ 1 ] ) ? args[ 1 ] : slice.call( args, 1 );
122 try {
123 return parser.parse( key, argsArray );
124 } catch ( e ) {
125 fallback = parser.settings.messages.get( key );
126 mw.log.warn( 'mediawiki.jqueryMsg: ' + key + ': ' + e.message );
127 return $( '<span>' ).text( fallback );
128 }
129 };
130 }
131
132 mw.jqueryMsg = {};
133
134 /**
135 * Initialize parser defaults.
136 *
137 * ResourceLoaderJqueryMsgModule calls this to provide default values from
138 * Sanitizer.php for allowed HTML elements. To override this data for individual
139 * parsers, pass the relevant options to mw.jqueryMsg.parser.
140 *
141 * @private
142 * @param {Object} data
143 */
144 mw.jqueryMsg.setParserDefaults = function ( data ) {
145 if ( data.allowedHtmlElements ) {
146 parserDefaults.allowedHtmlElements = data.allowedHtmlElements;
147 }
148 };
149
150 /**
151 * Returns a function suitable for use as a global, to construct strings from the message key (and optional replacements).
152 * e.g.
153 *
154 * window.gM = mediaWiki.jqueryMsg.getMessageFunction( options );
155 * $( 'p#headline' ).html( gM( 'hello-user', username ) );
156 *
157 * Like the old gM() function this returns only strings, so it destroys any bindings. If you want to preserve bindings use the
158 * jQuery plugin version instead. This is only included for backwards compatibility with gM().
159 *
160 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
161 * somefunction( a, b, c, d )
162 * is equivalent to
163 * somefunction( a, [b, c, d] )
164 *
165 * @param {Object} options parser options
166 * @return {Function} Function suitable for assigning to window.gM
167 * @return {string} return.key Message key.
168 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
169 * @return {string} return.return Rendered HTML.
170 */
171 mw.jqueryMsg.getMessageFunction = function ( options ) {
172 var failableParserFn = getFailableParserFn( options ),
173 format;
174
175 if ( options && options.format !== undefined ) {
176 format = options.format;
177 } else {
178 format = parserDefaults.format;
179 }
180
181 return function () {
182 var failableResult = failableParserFn( arguments );
183 if ( format === 'text' || format === 'escaped' ) {
184 return failableResult.text();
185 } else {
186 return failableResult.html();
187 }
188 };
189 };
190
191 /**
192 * Returns a jQuery plugin which parses the message in the message key, doing replacements optionally, and appends the nodes to
193 * the current selector. Bindings to passed-in jquery elements are preserved. Functions become click handlers for [$1 linktext] links.
194 * e.g.
195 *
196 * $.fn.msg = mediaWiki.jqueryMsg.getPlugin( options );
197 * var userlink = $( '<a>' ).click( function () { alert( "hello!!" ) } );
198 * $( 'p#headline' ).msg( 'hello-user', userlink );
199 *
200 * N.B. replacements are variadic arguments or an array in second parameter. In other words:
201 * somefunction( a, b, c, d )
202 * is equivalent to
203 * somefunction( a, [b, c, d] )
204 *
205 * We append to 'this', which in a jQuery plugin context will be the selected elements.
206 *
207 * @param {Object} options Parser options
208 * @return {Function} Function suitable for assigning to jQuery plugin, such as jQuery#msg
209 * @return {string} return.key Message key.
210 * @return {Array|Mixed} return.replacements Optional variable replacements (variadically or an array).
211 * @return {jQuery} return.return
212 */
213 mw.jqueryMsg.getPlugin = function ( options ) {
214 var failableParserFn = getFailableParserFn( options );
215
216 return function () {
217 var $target = this.empty();
218 // TODO: Simply appendWithoutParsing( $target, failableParserFn( arguments ).contents() )
219 // or Simply appendWithoutParsing( $target, failableParserFn( arguments ) )
220 $.each( failableParserFn( arguments ).contents(), function ( i, node ) {
221 appendWithoutParsing( $target, node );
222 } );
223 return $target;
224 };
225 };
226
227 /**
228 * The parser itself.
229 * Describes an object, whose primary duty is to .parse() message keys.
230 *
231 * @class
232 * @private
233 * @param {Object} options
234 */
235 mw.jqueryMsg.parser = function ( options ) {
236 this.settings = $.extend( {}, parserDefaults, options );
237 this.settings.onlyCurlyBraceTransform = ( this.settings.format === 'text' || this.settings.format === 'escaped' );
238
239 this.emitter = new mw.jqueryMsg.htmlEmitter( this.settings.language, this.settings.magic );
240 };
241
242 mw.jqueryMsg.parser.prototype = {
243 /**
244 * Cache mapping MediaWiki message keys and the value onlyCurlyBraceTransform, to the AST of the message.
245 *
246 * In most cases, the message is a string so this is identical.
247 * (This is why we would like to move this functionality server-side).
248 *
249 * The two parts of the key are separated by colon. For example:
250 *
251 * "message-key:true": ast
252 *
253 * if they key is "message-key" and onlyCurlyBraceTransform is true.
254 *
255 * This cache is shared by all instances of mw.jqueryMsg.parser.
256 *
257 * NOTE: We promise, it's static - when you create this empty object
258 * in the prototype, each new instance of the class gets a reference
259 * to the same object.
260 *
261 * @static
262 * @property {Object}
263 */
264 astCache: {},
265
266 /**
267 * Where the magic happens.
268 * Parses a message from the key, and swaps in replacements as necessary, wraps in jQuery
269 * If an error is thrown, returns original key, and logs the error
270 *
271 * @param {string} key Message key.
272 * @param {Array} replacements Variable replacements for $1, $2... $n
273 * @return {jQuery}
274 */
275 parse: function ( key, replacements ) {
276 return this.emitter.emit( this.getAst( key ), replacements );
277 },
278
279 /**
280 * Fetch the message string associated with a key, return parsed structure. Memoized.
281 * Note that we pass '[' + key + ']' back for a missing message here.
282 *
283 * @param {string} key
284 * @return {string|Array} string of '[key]' if message missing, simple string if possible, array of arrays if needs parsing
285 */
286 getAst: function ( key ) {
287 var wikiText,
288 cacheKey = [ key, this.settings.onlyCurlyBraceTransform ].join( ':' );
289
290 if ( this.astCache[ cacheKey ] === undefined ) {
291 wikiText = this.settings.messages.get( key );
292 if ( typeof wikiText !== 'string' ) {
293 wikiText = '\\[' + key + '\\]';
294 }
295 this.astCache[ cacheKey ] = this.wikiTextToAst( wikiText );
296 }
297 return this.astCache[ cacheKey ];
298 },
299
300 /**
301 * Parses the input wikiText into an abstract syntax tree, essentially an s-expression.
302 *
303 * CAVEAT: This does not parse all wikitext. It could be more efficient, but it's pretty good already.
304 * n.b. We want to move this functionality to the server. Nothing here is required to be on the client.
305 *
306 * @param {string} input Message string wikitext
307 * @throws Error
308 * @return {Mixed} abstract syntax tree
309 */
310 wikiTextToAst: function ( input ) {
311 var pos,
312 regularLiteral, regularLiteralWithoutBar, regularLiteralWithoutSpace, regularLiteralWithSquareBrackets,
313 doubleQuote, singleQuote, backslash, anyCharacter, asciiAlphabetLiteral,
314 escapedOrLiteralWithoutSpace, escapedOrLiteralWithoutBar, escapedOrRegularLiteral,
315 whitespace, dollar, digits, htmlDoubleQuoteAttributeValue, htmlSingleQuoteAttributeValue,
316 htmlAttributeEquals, openHtmlStartTag, optionalForwardSlash, openHtmlEndTag, closeHtmlTag,
317 openExtlink, closeExtlink, wikilinkPage, wikilinkContents, openWikilink, closeWikilink, templateName, pipe, colon,
318 templateContents, openTemplate, closeTemplate,
319 nonWhitespaceExpression, paramExpression, expression, curlyBraceTransformExpression, result,
320 settings = this.settings,
321 concat = Array.prototype.concat;
322
323 // Indicates current position in input as we parse through it.
324 // Shared among all parsing functions below.
325 pos = 0;
326
327 // =========================================================
328 // parsing combinators - could be a library on its own
329 // =========================================================
330
331 /**
332 * Try parsers until one works, if none work return null
333 *
334 * @private
335 * @param {Function[]} ps
336 * @return {string|null}
337 */
338 function choice( ps ) {
339 return function () {
340 var i, result;
341 for ( i = 0; i < ps.length; i++ ) {
342 result = ps[ i ]();
343 if ( result !== null ) {
344 return result;
345 }
346 }
347 return null;
348 };
349 }
350
351 /**
352 * Try several ps in a row, all must succeed or return null.
353 * This is the only eager one.
354 *
355 * @private
356 * @param {Function[]} ps
357 * @return {string|null}
358 */
359 function sequence( ps ) {
360 var i, res,
361 originalPos = pos,
362 result = [];
363 for ( i = 0; i < ps.length; i++ ) {
364 res = ps[ i ]();
365 if ( res === null ) {
366 pos = originalPos;
367 return null;
368 }
369 result.push( res );
370 }
371 return result;
372 }
373
374 /**
375 * Run the same parser over and over until it fails.
376 * Must succeed a minimum of n times or return null.
377 *
378 * @private
379 * @param {number} n
380 * @param {Function} p
381 * @return {string|null}
382 */
383 function nOrMore( n, p ) {
384 return function () {
385 var originalPos = pos,
386 result = [],
387 parsed = p();
388 while ( parsed !== null ) {
389 result.push( parsed );
390 parsed = p();
391 }
392 if ( result.length < n ) {
393 pos = originalPos;
394 return null;
395 }
396 return result;
397 };
398 }
399
400 /**
401 * There is a general pattern -- parse a thing, if that worked, apply transform, otherwise return null.
402 *
403 * TODO: But using this as a combinator seems to cause problems when combined with #nOrMore().
404 * May be some scoping issue
405 *
406 * @private
407 * @param {Function} p
408 * @param {Function} fn
409 * @return {string|null}
410 */
411 function transform( p, fn ) {
412 return function () {
413 var result = p();
414 return result === null ? null : fn( result );
415 };
416 }
417
418 /**
419 * Just make parsers out of simpler JS builtin types
420 *
421 * @private
422 * @param {string} s
423 * @return {Function}
424 * @return {string} return.return
425 */
426 function makeStringParser( s ) {
427 var len = s.length;
428 return function () {
429 var result = null;
430 if ( input.substr( pos, len ) === s ) {
431 result = s;
432 pos += len;
433 }
434 return result;
435 };
436 }
437
438 /**
439 * Makes a regex parser, given a RegExp object.
440 * The regex being passed in should start with a ^ to anchor it to the start
441 * of the string.
442 *
443 * @private
444 * @param {RegExp} regex anchored regex
445 * @return {Function} function to parse input based on the regex
446 */
447 function makeRegexParser( regex ) {
448 return function () {
449 var matches = input.slice( pos ).match( regex );
450 if ( matches === null ) {
451 return null;
452 }
453 pos += matches[ 0 ].length;
454 return matches[ 0 ];
455 };
456 }
457
458 // ===================================================================
459 // General patterns above this line -- wikitext specific parsers below
460 // ===================================================================
461
462 // Parsing functions follow. All parsing functions work like this:
463 // They don't accept any arguments.
464 // Instead, they just operate non destructively on the string 'input'
465 // As they can consume parts of the string, they advance the shared variable pos,
466 // and return tokens (or whatever else they want to return).
467 // some things are defined as closures and other things as ordinary functions
468 // converting everything to a closure makes it a lot harder to debug... errors pop up
469 // but some debuggers can't tell you exactly where they come from. Also the mutually
470 // recursive functions seem not to work in all browsers then. (Tested IE6-7, Opera, Safari, FF)
471 // This may be because, to save code, memoization was removed
472
473 regularLiteral = makeRegexParser( /^[^{}\[\]$<\\]/ );
474 regularLiteralWithoutBar = makeRegexParser( /^[^{}\[\]$\\|]/ );
475 regularLiteralWithoutSpace = makeRegexParser( /^[^{}\[\]$\s]/ );
476 regularLiteralWithSquareBrackets = makeRegexParser( /^[^{}$\\]/ );
477
478 backslash = makeStringParser( '\\' );
479 doubleQuote = makeStringParser( '"' );
480 singleQuote = makeStringParser( '\'' );
481 anyCharacter = makeRegexParser( /^./ );
482
483 openHtmlStartTag = makeStringParser( '<' );
484 optionalForwardSlash = makeRegexParser( /^\/?/ );
485 openHtmlEndTag = makeStringParser( '</' );
486 htmlAttributeEquals = makeRegexParser( /^\s*=\s*/ );
487 closeHtmlTag = makeRegexParser( /^\s*>/ );
488
489 function escapedLiteral() {
490 var result = sequence( [
491 backslash,
492 anyCharacter
493 ] );
494 return result === null ? null : result[ 1 ];
495 }
496 escapedOrLiteralWithoutSpace = choice( [
497 escapedLiteral,
498 regularLiteralWithoutSpace
499 ] );
500 escapedOrLiteralWithoutBar = choice( [
501 escapedLiteral,
502 regularLiteralWithoutBar
503 ] );
504 escapedOrRegularLiteral = choice( [
505 escapedLiteral,
506 regularLiteral
507 ] );
508 // Used to define "literals" without spaces, in space-delimited situations
509 function literalWithoutSpace() {
510 var result = nOrMore( 1, escapedOrLiteralWithoutSpace )();
511 return result === null ? null : result.join( '' );
512 }
513 // Used to define "literals" within template parameters. The pipe character is the parameter delimeter, so by default
514 // it is not a literal in the parameter
515 function literalWithoutBar() {
516 var result = nOrMore( 1, escapedOrLiteralWithoutBar )();
517 return result === null ? null : result.join( '' );
518 }
519
520 // Used for wikilink page names. Like literalWithoutBar, but
521 // without allowing escapes.
522 function unescapedLiteralWithoutBar() {
523 var result = nOrMore( 1, regularLiteralWithoutBar )();
524 return result === null ? null : result.join( '' );
525 }
526
527 function literal() {
528 var result = nOrMore( 1, escapedOrRegularLiteral )();
529 return result === null ? null : result.join( '' );
530 }
531
532 function curlyBraceTransformExpressionLiteral() {
533 var result = nOrMore( 1, regularLiteralWithSquareBrackets )();
534 return result === null ? null : result.join( '' );
535 }
536
537 asciiAlphabetLiteral = makeRegexParser( /[A-Za-z]+/ );
538 htmlDoubleQuoteAttributeValue = makeRegexParser( /^[^"]*/ );
539 htmlSingleQuoteAttributeValue = makeRegexParser( /^[^']*/ );
540
541 whitespace = makeRegexParser( /^\s+/ );
542 dollar = makeStringParser( '$' );
543 digits = makeRegexParser( /^\d+/ );
544
545 function replacement() {
546 var result = sequence( [
547 dollar,
548 digits
549 ] );
550 if ( result === null ) {
551 return null;
552 }
553 return [ 'REPLACE', parseInt( result[ 1 ], 10 ) - 1 ];
554 }
555 openExtlink = makeStringParser( '[' );
556 closeExtlink = makeStringParser( ']' );
557 // this extlink MUST have inner contents, e.g. [foo] not allowed; [foo bar] [foo <i>bar</i>], etc. are allowed
558 function extlink() {
559 var result, parsedResult;
560 result = null;
561 parsedResult = sequence( [
562 openExtlink,
563 nonWhitespaceExpression,
564 whitespace,
565 nOrMore( 1, expression ),
566 closeExtlink
567 ] );
568 if ( parsedResult !== null ) {
569 result = [ 'EXTLINK', parsedResult[ 1 ] ];
570 // TODO (mattflaschen, 2013-03-22): Clean this up if possible.
571 // It's avoiding CONCAT for single nodes, so they at least doesn't get the htmlEmitter span.
572 if ( parsedResult[ 3 ].length === 1 ) {
573 result.push( parsedResult[ 3 ][ 0 ] );
574 } else {
575 result.push( [ 'CONCAT' ].concat( parsedResult[ 3 ] ) );
576 }
577 }
578 return result;
579 }
580 // this is the same as the above extlink, except that the url is being passed on as a parameter
581 function extLinkParam() {
582 var result = sequence( [
583 openExtlink,
584 dollar,
585 digits,
586 whitespace,
587 expression,
588 closeExtlink
589 ] );
590 if ( result === null ) {
591 return null;
592 }
593 return [ 'EXTLINKPARAM', parseInt( result[ 2 ], 10 ) - 1, result[ 4 ] ];
594 }
595 openWikilink = makeStringParser( '[[' );
596 closeWikilink = makeStringParser( ']]' );
597 pipe = makeStringParser( '|' );
598
599 function template() {
600 var result = sequence( [
601 openTemplate,
602 templateContents,
603 closeTemplate
604 ] );
605 return result === null ? null : result[ 1 ];
606 }
607
608 wikilinkPage = choice( [
609 unescapedLiteralWithoutBar,
610 template
611 ] );
612
613 function pipedWikilink() {
614 var result = sequence( [
615 wikilinkPage,
616 pipe,
617 expression
618 ] );
619 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
620 }
621
622 wikilinkContents = choice( [
623 pipedWikilink,
624 wikilinkPage // unpiped link
625 ] );
626
627 function wikilink() {
628 var result, parsedResult, parsedLinkContents;
629 result = null;
630
631 parsedResult = sequence( [
632 openWikilink,
633 wikilinkContents,
634 closeWikilink
635 ] );
636 if ( parsedResult !== null ) {
637 parsedLinkContents = parsedResult[ 1 ];
638 result = [ 'WIKILINK' ].concat( parsedLinkContents );
639 }
640 return result;
641 }
642
643 // TODO: Support data- if appropriate
644 function doubleQuotedHtmlAttributeValue() {
645 var parsedResult = sequence( [
646 doubleQuote,
647 htmlDoubleQuoteAttributeValue,
648 doubleQuote
649 ] );
650 return parsedResult === null ? null : parsedResult[ 1 ];
651 }
652
653 function singleQuotedHtmlAttributeValue() {
654 var parsedResult = sequence( [
655 singleQuote,
656 htmlSingleQuoteAttributeValue,
657 singleQuote
658 ] );
659 return parsedResult === null ? null : parsedResult[ 1 ];
660 }
661
662 function htmlAttribute() {
663 var parsedResult = sequence( [
664 whitespace,
665 asciiAlphabetLiteral,
666 htmlAttributeEquals,
667 choice( [
668 doubleQuotedHtmlAttributeValue,
669 singleQuotedHtmlAttributeValue
670 ] )
671 ] );
672 return parsedResult === null ? null : [ parsedResult[ 1 ], parsedResult[ 3 ] ];
673 }
674
675 /**
676 * Checks if HTML is allowed
677 *
678 * @param {string} startTagName HTML start tag name
679 * @param {string} endTagName HTML start tag name
680 * @param {Object} attributes array of consecutive key value pairs,
681 * with index 2 * n being a name and 2 * n + 1 the associated value
682 * @return {boolean} true if this is HTML is allowed, false otherwise
683 */
684 function isAllowedHtml( startTagName, endTagName, attributes ) {
685 var i, len, attributeName;
686
687 startTagName = startTagName.toLowerCase();
688 endTagName = endTagName.toLowerCase();
689 if ( startTagName !== endTagName || $.inArray( startTagName, settings.allowedHtmlElements ) === -1 ) {
690 return false;
691 }
692
693 for ( i = 0, len = attributes.length; i < len; i += 2 ) {
694 attributeName = attributes[ i ];
695 if ( $.inArray( attributeName, settings.allowedHtmlCommonAttributes ) === -1 &&
696 $.inArray( attributeName, settings.allowedHtmlAttributesByElement[ startTagName ] || [] ) === -1 ) {
697 return false;
698 }
699 }
700
701 return true;
702 }
703
704 function htmlAttributes() {
705 var parsedResult = nOrMore( 0, htmlAttribute )();
706 // Un-nest attributes array due to structure of jQueryMsg operations (see emit).
707 return concat.apply( [ 'HTMLATTRIBUTES' ], parsedResult );
708 }
709
710 // Subset of allowed HTML markup.
711 // Most elements and many attributes allowed on the server are not supported yet.
712 function html() {
713 var parsedOpenTagResult, parsedHtmlContents, parsedCloseTagResult,
714 wrappedAttributes, attributes, startTagName, endTagName, startOpenTagPos,
715 startCloseTagPos, endOpenTagPos, endCloseTagPos,
716 result = null;
717
718 // Break into three sequence calls. That should allow accurate reconstruction of the original HTML, and requiring an exact tag name match.
719 // 1. open through closeHtmlTag
720 // 2. expression
721 // 3. openHtmlEnd through close
722 // This will allow recording the positions to reconstruct if HTML is to be treated as text.
723
724 startOpenTagPos = pos;
725 parsedOpenTagResult = sequence( [
726 openHtmlStartTag,
727 asciiAlphabetLiteral,
728 htmlAttributes,
729 optionalForwardSlash,
730 closeHtmlTag
731 ] );
732
733 if ( parsedOpenTagResult === null ) {
734 return null;
735 }
736
737 endOpenTagPos = pos;
738 startTagName = parsedOpenTagResult[ 1 ];
739
740 parsedHtmlContents = nOrMore( 0, expression )();
741
742 startCloseTagPos = pos;
743 parsedCloseTagResult = sequence( [
744 openHtmlEndTag,
745 asciiAlphabetLiteral,
746 closeHtmlTag
747 ] );
748
749 if ( parsedCloseTagResult === null ) {
750 // Closing tag failed. Return the start tag and contents.
751 return [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
752 .concat( parsedHtmlContents );
753 }
754
755 endCloseTagPos = pos;
756 endTagName = parsedCloseTagResult[ 1 ];
757 wrappedAttributes = parsedOpenTagResult[ 2 ];
758 attributes = wrappedAttributes.slice( 1 );
759 if ( isAllowedHtml( startTagName, endTagName, attributes ) ) {
760 result = [ 'HTMLELEMENT', startTagName, wrappedAttributes ]
761 .concat( parsedHtmlContents );
762 } else {
763 // HTML is not allowed, so contents will remain how
764 // it was, while HTML markup at this level will be
765 // treated as text
766 // E.g. assuming script tags are not allowed:
767 //
768 // <script>[[Foo|bar]]</script>
769 //
770 // results in '&lt;script&gt;' and '&lt;/script&gt;'
771 // (not treated as an HTML tag), surrounding a fully
772 // parsed HTML link.
773 //
774 // Concatenate everything from the tag, flattening the contents.
775 result = [ 'CONCAT', input.slice( startOpenTagPos, endOpenTagPos ) ]
776 .concat( parsedHtmlContents, input.slice( startCloseTagPos, endCloseTagPos ) );
777 }
778
779 return result;
780 }
781
782 templateName = transform(
783 // see $wgLegalTitleChars
784 // not allowing : due to the need to catch "PLURAL:$1"
785 makeRegexParser( /^[ !"$&'()*,.\/0-9;=?@A-Z\^_`a-z~\x80-\xFF+\-]+/ ),
786 function ( result ) { return result.toString(); }
787 );
788 function templateParam() {
789 var expr, result;
790 result = sequence( [
791 pipe,
792 nOrMore( 0, paramExpression )
793 ] );
794 if ( result === null ) {
795 return null;
796 }
797 expr = result[ 1 ];
798 // use a CONCAT operator if there are multiple nodes, otherwise return the first node, raw.
799 return expr.length > 1 ? [ 'CONCAT' ].concat( expr ) : expr[ 0 ];
800 }
801
802 function templateWithReplacement() {
803 var result = sequence( [
804 templateName,
805 colon,
806 replacement
807 ] );
808 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
809 }
810 function templateWithOutReplacement() {
811 var result = sequence( [
812 templateName,
813 colon,
814 paramExpression
815 ] );
816 return result === null ? null : [ result[ 0 ], result[ 2 ] ];
817 }
818 function templateWithOutFirstParameter() {
819 var result = sequence( [
820 templateName,
821 colon
822 ] );
823 return result === null ? null : [ result[ 0 ], '' ];
824 }
825 colon = makeStringParser( ':' );
826 templateContents = choice( [
827 function () {
828 var res = sequence( [
829 // templates can have placeholders for dynamic replacement eg: {{PLURAL:$1|one car|$1 cars}}
830 // or no placeholders eg: {{GRAMMAR:genitive|{{SITENAME}}}
831 choice( [ templateWithReplacement, templateWithOutReplacement, templateWithOutFirstParameter ] ),
832 nOrMore( 0, templateParam )
833 ] );
834 return res === null ? null : res[ 0 ].concat( res[ 1 ] );
835 },
836 function () {
837 var res = sequence( [
838 templateName,
839 nOrMore( 0, templateParam )
840 ] );
841 if ( res === null ) {
842 return null;
843 }
844 return [ res[ 0 ] ].concat( res[ 1 ] );
845 }
846 ] );
847 openTemplate = makeStringParser( '{{' );
848 closeTemplate = makeStringParser( '}}' );
849 nonWhitespaceExpression = choice( [
850 template,
851 wikilink,
852 extLinkParam,
853 extlink,
854 replacement,
855 literalWithoutSpace
856 ] );
857 paramExpression = choice( [
858 template,
859 wikilink,
860 extLinkParam,
861 extlink,
862 replacement,
863 literalWithoutBar
864 ] );
865
866 expression = choice( [
867 template,
868 wikilink,
869 extLinkParam,
870 extlink,
871 replacement,
872 html,
873 literal
874 ] );
875
876 // Used when only {{-transformation is wanted, for 'text'
877 // or 'escaped' formats
878 curlyBraceTransformExpression = choice( [
879 template,
880 replacement,
881 curlyBraceTransformExpressionLiteral
882 ] );
883
884 /**
885 * Starts the parse
886 *
887 * @param {Function} rootExpression root parse function
888 */
889 function start( rootExpression ) {
890 var result = nOrMore( 0, rootExpression )();
891 if ( result === null ) {
892 return null;
893 }
894 return [ 'CONCAT' ].concat( result );
895 }
896 // everything above this point is supposed to be stateless/static, but
897 // I am deferring the work of turning it into prototypes & objects. It's quite fast enough
898 // finally let's do some actual work...
899
900 // If you add another possible rootExpression, you must update the astCache key scheme.
901 result = start( this.settings.onlyCurlyBraceTransform ? curlyBraceTransformExpression : expression );
902
903 /*
904 * For success, the p must have gotten to the end of the input
905 * and returned a non-null.
906 * n.b. This is part of language infrastructure, so we do not throw an internationalizable message.
907 */
908 if ( result === null || pos !== input.length ) {
909 throw new Error( 'Parse error at position ' + pos.toString() + ' in input: ' + input );
910 }
911 return result;
912 }
913
914 };
915
916 /**
917 * htmlEmitter - object which primarily exists to emit HTML from parser ASTs
918 */
919 mw.jqueryMsg.htmlEmitter = function ( language, magic ) {
920 this.language = language;
921 var jmsg = this;
922 $.each( magic, function ( key, val ) {
923 jmsg[ key.toLowerCase() ] = function () {
924 return val;
925 };
926 } );
927
928 /**
929 * (We put this method definition here, and not in prototype, to make sure it's not overwritten by any magic.)
930 * Walk entire node structure, applying replacements and template functions when appropriate
931 *
932 * @param {Mixed} node Abstract syntax tree (top node or subnode)
933 * @param {Array} replacements for $1, $2, ... $n
934 * @return {Mixed} single-string node or array of nodes suitable for jQuery appending
935 */
936 this.emit = function ( node, replacements ) {
937 var ret, subnodes, operation,
938 jmsg = this;
939 switch ( typeof node ) {
940 case 'string':
941 case 'number':
942 ret = node;
943 break;
944 // typeof returns object for arrays
945 case 'object':
946 // node is an array of nodes
947 subnodes = $.map( node.slice( 1 ), function ( n ) {
948 return jmsg.emit( n, replacements );
949 } );
950 operation = node[ 0 ].toLowerCase();
951 if ( typeof jmsg[ operation ] === 'function' ) {
952 ret = jmsg[ operation ]( subnodes, replacements );
953 } else {
954 throw new Error( 'Unknown operation "' + operation + '"' );
955 }
956 break;
957 case 'undefined':
958 // Parsing the empty string (as an entire expression, or as a paramExpression in a template) results in undefined
959 // Perhaps a more clever parser can detect this, and return the empty string? Or is that useful information?
960 // The logical thing is probably to return the empty string here when we encounter undefined.
961 ret = '';
962 break;
963 default:
964 throw new Error( 'Unexpected type in AST: ' + typeof node );
965 }
966 return ret;
967 };
968 };
969
970 // For everything in input that follows double-open-curly braces, there should be an equivalent parser
971 // function. For instance {{PLURAL ... }} will be processed by 'plural'.
972 // If you have 'magic words' then configure the parser to have them upon creation.
973 //
974 // An emitter method takes the parent node, the array of subnodes and the array of replacements (the values that $1, $2... should translate to).
975 // Note: all such functions must be pure, with the exception of referring to other pure functions via this.language (convertPlural and so on)
976 mw.jqueryMsg.htmlEmitter.prototype = {
977 /**
978 * Parsing has been applied depth-first we can assume that all nodes here are single nodes
979 * Must return a single node to parents -- a jQuery with synthetic span
980 * However, unwrap any other synthetic spans in our children and pass them upwards
981 *
982 * @param {Mixed[]} nodes Some single nodes, some arrays of nodes
983 * @return {jQuery}
984 */
985 concat: function ( nodes ) {
986 var $span = $( '<span>' ).addClass( 'mediaWiki_htmlEmitter' );
987 $.each( nodes, function ( i, node ) {
988 if ( node instanceof jQuery && node.hasClass( 'mediaWiki_htmlEmitter' ) ) {
989 $.each( node.contents(), function ( j, childNode ) {
990 appendWithoutParsing( $span, childNode );
991 } );
992 } else {
993 // Let jQuery append nodes, arrays of nodes and jQuery objects
994 // other things (strings, numbers, ..) are appended as text nodes (not as HTML strings)
995 appendWithoutParsing( $span, node );
996 }
997 } );
998 return $span;
999 },
1000
1001 /**
1002 * Return escaped replacement of correct index, or string if unavailable.
1003 * Note that we expect the parsed parameter to be zero-based. i.e. $1 should have become [ 0 ].
1004 * if the specified parameter is not found return the same string
1005 * (e.g. "$99" -> parameter 98 -> not found -> return "$99" )
1006 *
1007 * TODO: Throw error if nodes.length > 1 ?
1008 *
1009 * @param {Array} nodes List of one element, integer, n >= 0
1010 * @param {Array} replacements List of at least n strings
1011 * @return {String} replacement
1012 */
1013 replace: function ( nodes, replacements ) {
1014 var index = parseInt( nodes[ 0 ], 10 );
1015
1016 if ( index < replacements.length ) {
1017 return replacements[ index ];
1018 } else {
1019 // index not found, fallback to displaying variable
1020 return '$' + ( index + 1 );
1021 }
1022 },
1023
1024 /**
1025 * Transform wiki-link
1026 *
1027 * TODO:
1028 * It only handles basic cases, either no pipe, or a pipe with an explicit
1029 * anchor.
1030 *
1031 * It does not attempt to handle features like the pipe trick.
1032 * However, the pipe trick should usually not be present in wikitext retrieved
1033 * from the server, since the replacement is done at save time.
1034 * It may, though, if the wikitext appears in extension-controlled content.
1035 *
1036 * @param {String[]} nodes
1037 */
1038 wikilink: function ( nodes ) {
1039 var page, anchor, url;
1040
1041 page = nodes[ 0 ];
1042 url = mw.util.getUrl( page );
1043
1044 if ( nodes.length === 1 ) {
1045 // [[Some Page]] or [[Namespace:Some Page]]
1046 anchor = page;
1047 } else {
1048 // [[Some Page|anchor text]] or [[Namespace:Some Page|anchor]]
1049 anchor = nodes[ 1 ];
1050 }
1051
1052 return $( '<a>' ).attr( {
1053 title: page,
1054 href: url
1055 } ).text( anchor );
1056 },
1057
1058 /**
1059 * Converts array of HTML element key value pairs to object
1060 *
1061 * @param {Array} nodes Array of consecutive key value pairs, with index 2 * n being a
1062 * name and 2 * n + 1 the associated value
1063 * @return {Object} Object mapping attribute name to attribute value
1064 */
1065 htmlattributes: function ( nodes ) {
1066 var i, len, mapping = {};
1067 for ( i = 0, len = nodes.length; i < len; i += 2 ) {
1068 mapping[ nodes[ i ] ] = decodePrimaryHtmlEntities( nodes[ i + 1 ] );
1069 }
1070 return mapping;
1071 },
1072
1073 /**
1074 * Handles an (already-validated) HTML element.
1075 *
1076 * @param {Array} nodes Nodes to process when creating element
1077 * @return {jQuery|Array} jQuery node for valid HTML or array for disallowed element
1078 */
1079 htmlelement: function ( nodes ) {
1080 var tagName, attributes, contents, $element;
1081
1082 tagName = nodes.shift();
1083 attributes = nodes.shift();
1084 contents = nodes;
1085 $element = $( document.createElement( tagName ) ).attr( attributes );
1086 return appendWithoutParsing( $element, contents );
1087 },
1088
1089 /**
1090 * Transform parsed structure into external link
1091 * If the href is a jQuery object, treat it as "enclosing" the link text.
1092 *
1093 * - ... function, treat it as the click handler.
1094 * - ... string, treat it as a URI.
1095 *
1096 * TODO: throw an error if nodes.length > 2 ?
1097 *
1098 * @param {Array} nodes List of two elements, {jQuery|Function|String} and {String}
1099 * @return {jQuery}
1100 */
1101 extlink: function ( nodes ) {
1102 var $el,
1103 arg = nodes[ 0 ],
1104 contents = nodes[ 1 ];
1105 if ( arg instanceof jQuery ) {
1106 $el = arg;
1107 } else {
1108 $el = $( '<a>' );
1109 if ( typeof arg === 'function' ) {
1110 $el.attr( 'href', '#' )
1111 .click( function ( e ) {
1112 e.preventDefault();
1113 } )
1114 .click( arg );
1115 } else {
1116 $el.attr( 'href', arg.toString() );
1117 }
1118 }
1119 return appendWithoutParsing( $el, contents );
1120 },
1121
1122 /**
1123 * This is basically use a combination of replace + external link (link with parameter
1124 * as url), but we don't want to run the regular replace here-on: inserting a
1125 * url as href-attribute of a link will automatically escape it already, so
1126 * we don't want replace to (manually) escape it as well.
1127 *
1128 * TODO: throw error if nodes.length > 1 ?
1129 *
1130 * @param {Array} nodes List of one element, integer, n >= 0
1131 * @param {Array} replacements List of at least n strings
1132 * @return {string} replacement
1133 */
1134 extlinkparam: function ( nodes, replacements ) {
1135 var replacement,
1136 index = parseInt( nodes[ 0 ], 10 );
1137 if ( index < replacements.length ) {
1138 replacement = replacements[ index ];
1139 } else {
1140 replacement = '$' + ( index + 1 );
1141 }
1142 return this.extlink( [ replacement, nodes[ 1 ] ] );
1143 },
1144
1145 /**
1146 * Transform parsed structure into pluralization
1147 * n.b. The first node may be a non-integer (for instance, a string representing an Arabic number).
1148 * So convert it back with the current language's convertNumber.
1149 *
1150 * @param {Array} nodes List of nodes, [ {string|number}, {string}, {string} ... ]
1151 * @return {string} selected pluralized form according to current language
1152 */
1153 plural: function ( nodes ) {
1154 var forms, firstChild, firstChildText, explicitPluralFormNumber, formIndex, form, count,
1155 explicitPluralForms = {};
1156
1157 count = parseFloat( this.language.convertNumber( nodes[ 0 ], true ) );
1158 forms = nodes.slice( 1 );
1159 for ( formIndex = 0; formIndex < forms.length; formIndex++ ) {
1160 form = forms[ formIndex ];
1161
1162 if ( form.jquery && form.hasClass( 'mediaWiki_htmlEmitter' ) ) {
1163 // This is a nested node, may be an explicit plural form like 5=[$2 linktext]
1164 firstChild = form.contents().get( 0 );
1165 if ( firstChild && firstChild.nodeType === Node.TEXT_NODE ) {
1166 firstChildText = firstChild.textContent;
1167 if ( /^\d+=/.test( firstChildText ) ) {
1168 explicitPluralFormNumber = parseInt( firstChildText.split( /=/ )[ 0 ], 10 );
1169 // Use the digit part as key and rest of first text node and
1170 // rest of child nodes as value.
1171 firstChild.textContent = firstChildText.slice( firstChildText.indexOf( '=' ) + 1 );
1172 explicitPluralForms[ explicitPluralFormNumber ] = form;
1173 forms[ formIndex ] = undefined;
1174 }
1175 }
1176 } else if ( /^\d+=/.test( form ) ) {
1177 // Simple explicit plural forms like 12=a dozen
1178 explicitPluralFormNumber = parseInt( form.split( /=/ )[ 0 ], 10 );
1179 explicitPluralForms[ explicitPluralFormNumber ] = form.slice( form.indexOf( '=' ) + 1 );
1180 forms[ formIndex ] = undefined;
1181 }
1182 }
1183
1184 // Remove explicit plural forms from the forms. They were set undefined in the above loop.
1185 forms = $.map( forms, function ( form ) {
1186 return form;
1187 } );
1188
1189 return this.language.convertPlural( count, forms, explicitPluralForms );
1190 },
1191
1192 /**
1193 * Transform parsed structure according to gender.
1194 *
1195 * Usage: {{gender:[ mw.user object | '' | 'male' | 'female' | 'unknown' ] | masculine form | feminine form | neutral form}}.
1196 *
1197 * The first node must be one of:
1198 * - the mw.user object (or a compatible one)
1199 * - an empty string - indicating the current user, same effect as passing the mw.user object
1200 * - a gender string ('male', 'female' or 'unknown')
1201 *
1202 * @param {Array} nodes List of nodes, [ {string|mw.user}, {string}, {string}, {string} ]
1203 * @return {string} Selected gender form according to current language
1204 */
1205 gender: function ( nodes ) {
1206 var gender,
1207 maybeUser = nodes[ 0 ],
1208 forms = nodes.slice( 1 );
1209
1210 if ( maybeUser === '' ) {
1211 maybeUser = mw.user;
1212 }
1213
1214 // If we are passed a mw.user-like object, check their gender.
1215 // Otherwise, assume the gender string itself was passed .
1216 if ( maybeUser && maybeUser.options instanceof mw.Map ) {
1217 gender = maybeUser.options.get( 'gender' );
1218 } else {
1219 gender = maybeUser;
1220 }
1221
1222 return this.language.gender( gender, forms );
1223 },
1224
1225 /**
1226 * Transform parsed structure into grammar conversion.
1227 * Invoked by putting `{{grammar:form|word}}` in a message
1228 *
1229 * @param {Array} nodes List of nodes [{Grammar case eg: genitive}, {string word}]
1230 * @return {string} selected grammatical form according to current language
1231 */
1232 grammar: function ( nodes ) {
1233 var form = nodes[ 0 ],
1234 word = nodes[ 1 ];
1235 return word && form && this.language.convertGrammar( word, form );
1236 },
1237
1238 /**
1239 * Tranform parsed structure into a int: (interface language) message include
1240 * Invoked by putting `{{int:othermessage}}` into a message
1241 *
1242 * @param {Array} nodes List of nodes
1243 * @return {string} Other message
1244 */
1245 'int': function ( nodes ) {
1246 var msg = nodes[ 0 ];
1247 return mw.jqueryMsg.getMessageFunction()( msg.charAt( 0 ).toLowerCase() + msg.slice( 1 ) );
1248 },
1249
1250 /**
1251 * Takes an unformatted number (arab, no group separators and . as decimal separator)
1252 * and outputs it in the localized digit script and formatted with decimal
1253 * separator, according to the current language.
1254 *
1255 * @param {Array} nodes List of nodes
1256 * @return {number|string} Formatted number
1257 */
1258 formatnum: function ( nodes ) {
1259 var isInteger = ( nodes[ 1 ] && nodes[ 1 ] === 'R' ) ? true : false,
1260 number = nodes[ 0 ];
1261
1262 return this.language.convertNumber( number, isInteger );
1263 }
1264 };
1265
1266 // Deprecated! don't rely on gM existing.
1267 // The window.gM ought not to be required - or if required, not required here.
1268 // But moving it to extensions breaks it (?!)
1269 // Need to fix plugin so it could do attributes as well, then will be okay to remove this.
1270 // @deprecated since 1.23
1271 mw.log.deprecate( window, 'gM', mw.jqueryMsg.getMessageFunction(), 'Use mw.message( ... ).parse() instead.' );
1272
1273 /**
1274 * @method
1275 * @member jQuery
1276 * @see mw.jqueryMsg#getPlugin
1277 */
1278 $.fn.msg = mw.jqueryMsg.getPlugin();
1279
1280 // Replace the default message parser with jqueryMsg
1281 oldParser = mw.Message.prototype.parser;
1282 mw.Message.prototype.parser = function () {
1283 var messageFunction;
1284
1285 // TODO: should we cache the message function so we don't create a new one every time? Benchmark this maybe?
1286 // Caching is somewhat problematic, because we do need different message functions for different maps, so
1287 // we'd have to cache the parser as a member of this.map, which sounds a bit ugly.
1288 // Do not use mw.jqueryMsg unless required
1289 if ( this.format === 'plain' || !/\{\{|[\[<>]/.test( this.map.get( this.key ) ) ) {
1290 // Fall back to mw.msg's simple parser
1291 return oldParser.apply( this );
1292 }
1293
1294 messageFunction = mw.jqueryMsg.getMessageFunction( {
1295 messages: this.map,
1296 // For format 'escaped', escaping part is handled by mediawiki.js
1297 format: this.format
1298 } );
1299 return messageFunction( this.key, this.parameters );
1300 };
1301
1302 }( mediaWiki, jQuery ) );