SECURITY: resources: Patch jQuery 3.3.1 for CVE-2019-11358
[lhc/web/wiklou.git] / resources / src / mediawiki.String.js
1 ( function () {
2
3 /**
4 * @class mw.String
5 * @singleton
6 */
7
8 /**
9 * Calculate the byte length of a string (accounting for UTF-8).
10 *
11 * @author Jan Paul Posma, 2011
12 * @author Timo Tijhof, 2012
13 * @author David Chan, 2013
14 *
15 * @param {string} str
16 * @return {number}
17 */
18 function byteLength( str ) {
19 // This basically figures out how many bytes a UTF-16 string (which is what js sees)
20 // will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
21 // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
22 // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
23 // edge cases such as illegal sequences, but that should never happen.
24
25 // https://en.wikipedia.org/wiki/UTF-8#Description
26 // The mapping from UTF-16 code units to UTF-8 bytes is as follows:
27 // > Range 0000-007F: codepoints that become 1 byte of UTF-8
28 // > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
29 // > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
30 // > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
31 // > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 (continued)
32
33 return str
34 .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
35 .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
36 .length;
37 }
38
39 /**
40 * Calculate the character length of a string (accounting for UTF-16 surrogates).
41 *
42 * @param {string} str
43 * @return {number}
44 */
45 function codePointLength( str ) {
46 return str
47 // Low surrogate + high surrogate pairs represent one character (codepoint) each
48 .replace( /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, '*' )
49 .length;
50 }
51
52 // Like String#charAt, but return the pair of UTF-16 surrogates for characters outside of BMP.
53 function codePointAt( string, offset, backwards ) {
54 // We don't need to check for offsets at the beginning or end of string,
55 // String#slice will simply return a shorter (or empty) substring.
56 var maybePair = backwards ?
57 string.slice( offset - 1, offset + 1 ) :
58 string.slice( offset, offset + 2 );
59 if ( /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( maybePair ) ) {
60 return maybePair;
61 } else {
62 return string.charAt( offset );
63 }
64 }
65
66 function trimLength( safeVal, newVal, length, lengthFn ) {
67 var startMatches, endMatches, matchesLen, inpParts, chopOff, oldChar, newChar,
68 oldVal = safeVal;
69
70 // Run the hook if one was provided, but only on the length
71 // assessment. The value itself is not to be affected by the hook.
72 if ( lengthFn( newVal ) <= length ) {
73 // Limit was not reached, just remember the new value
74 // and let the user continue.
75 return {
76 newVal: newVal,
77 trimmed: false
78 };
79 }
80
81 // Current input is longer than the active limit.
82 // Figure out what was added and limit the addition.
83 startMatches = 0;
84 endMatches = 0;
85
86 // It is important that we keep the search within the range of
87 // the shortest string's length.
88 // Imagine a user adds text that matches the end of the old value
89 // (e.g. "foo" -> "foofoo"). startMatches would be 3, but without
90 // limiting both searches to the shortest length, endMatches would
91 // also be 3.
92 matchesLen = Math.min( newVal.length, oldVal.length );
93
94 // Count same characters from the left, first.
95 // (if "foo" -> "foofoo", assume addition was at the end).
96 while ( startMatches < matchesLen ) {
97 oldChar = codePointAt( oldVal, startMatches, false );
98 newChar = codePointAt( newVal, startMatches, false );
99 if ( oldChar !== newChar ) {
100 break;
101 }
102 startMatches += oldChar.length;
103 }
104
105 while ( endMatches < ( matchesLen - startMatches ) ) {
106 oldChar = codePointAt( oldVal, oldVal.length - 1 - endMatches, true );
107 newChar = codePointAt( newVal, newVal.length - 1 - endMatches, true );
108 if ( oldChar !== newChar ) {
109 break;
110 }
111 endMatches += oldChar.length;
112 }
113
114 inpParts = [
115 // Same start
116 newVal.slice( 0, startMatches ),
117 // Inserted content
118 newVal.slice( startMatches, newVal.length - endMatches ),
119 // Same end
120 newVal.slice( newVal.length - endMatches )
121 ];
122
123 // Chop off characters from the end of the "inserted content" string
124 // until the limit is statisfied.
125 // Make sure to stop when there is nothing to slice (T43450).
126 while ( lengthFn( inpParts.join( '' ) ) > length && inpParts[ 1 ].length > 0 ) {
127 // Do not chop off halves of surrogate pairs
128 chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
129 inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
130 }
131
132 return {
133 newVal: inpParts.join( '' ),
134 // For pathological lengthFn() that always returns a length greater than the limit, we might have
135 // ended up not trimming - check for this case to avoid infinite loops
136 trimmed: newVal !== inpParts.join( '' )
137 };
138 }
139
140 /**
141 * Utility function to trim down a string, based on byteLimit
142 * and given a safe start position. It supports insertion anywhere
143 * in the string, so "foo" to "fobaro" if limit is 4 will result in
144 * "fobo", not "foba". Basically emulating the native maxlength by
145 * reconstructing where the insertion occurred.
146 *
147 * @param {string} safeVal Known value that was previously returned by this
148 * function, if none, pass empty string.
149 * @param {string} newVal New value that may have to be trimmed down.
150 * @param {number} byteLimit Number of bytes the value may be in size.
151 * @param {Function} [filterFunction] Function to call on the string before assessing the length.
152 * @return {Object}
153 * @return {string} return.newVal
154 * @return {boolean} return.trimmed
155 */
156 function trimByteLength( safeVal, newVal, byteLimit, filterFunction ) {
157 var lengthFn;
158 if ( filterFunction ) {
159 lengthFn = function ( val ) {
160 return byteLength( filterFunction( val ) );
161 };
162 } else {
163 lengthFn = byteLength;
164 }
165
166 return trimLength( safeVal, newVal, byteLimit, lengthFn );
167 }
168
169 /**
170 * Utility function to trim down a string, based on codePointLimit
171 * and given a safe start position. It supports insertion anywhere
172 * in the string, so "foo" to "fobaro" if limit is 4 will result in
173 * "fobo", not "foba". Basically emulating the native maxlength by
174 * reconstructing where the insertion occurred.
175 *
176 * @param {string} safeVal Known value that was previously returned by this
177 * function, if none, pass empty string.
178 * @param {string} newVal New value that may have to be trimmed down.
179 * @param {number} codePointLimit Number of characters the value may be in size.
180 * @param {Function} [filterFunction] Function to call on the string before assessing the length.
181 * @return {Object}
182 * @return {string} return.newVal
183 * @return {boolean} return.trimmed
184 */
185 function trimCodePointLength( safeVal, newVal, codePointLimit, filterFunction ) {
186 var lengthFn;
187 if ( filterFunction ) {
188 lengthFn = function ( val ) {
189 return codePointLength( filterFunction( val ) );
190 };
191 } else {
192 lengthFn = codePointLength;
193 }
194
195 return trimLength( safeVal, newVal, codePointLimit, lengthFn );
196 }
197
198 module.exports = {
199 byteLength: byteLength,
200 codePointLength: codePointLength,
201 trimByteLength: trimByteLength,
202 trimCodePointLength: trimCodePointLength
203 };
204
205 }() );