jquery.byteLimit: Handle characters outside BMP (surrogate pairs) when trimming
[lhc/web/wiklou.git] / resources / src / jquery / jquery.byteLimit.js
1 /**
2 * @class jQuery.plugin.byteLimit
3 */
4 ( function ( $ ) {
5
6 var eventKeys = [
7 'keyup.byteLimit',
8 'keydown.byteLimit',
9 'change.byteLimit',
10 'mouseup.byteLimit',
11 'cut.byteLimit',
12 'paste.byteLimit',
13 'focus.byteLimit',
14 'blur.byteLimit'
15 ].join( ' ' );
16
17 // Like String#charAt, but return the pair of UTF-16 surrogates for characters outside of BMP.
18 function codePointAt( string, offset, backwards ) {
19 // We don't need to check for offsets at the beginning or end of string,
20 // String#slice will simply return a shorter (or empty) substring.
21 var maybePair = backwards ?
22 string.slice( offset - 1, offset + 1 ) :
23 string.slice( offset, offset + 2 );
24 if ( /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( maybePair ) ) {
25 return maybePair;
26 } else {
27 return string.charAt( offset );
28 }
29 }
30
31 /**
32 * Utility function to trim down a string, based on byteLimit
33 * and given a safe start position. It supports insertion anywhere
34 * in the string, so "foo" to "fobaro" if limit is 4 will result in
35 * "fobo", not "foba". Basically emulating the native maxlength by
36 * reconstructing where the insertion occurred.
37 *
38 * @static
39 * @param {string} safeVal Known value that was previously returned by this
40 * function, if none, pass empty string.
41 * @param {string} newVal New value that may have to be trimmed down.
42 * @param {number} byteLimit Number of bytes the value may be in size.
43 * @param {Function} [fn] See jQuery#byteLimit.
44 * @return {Object}
45 * @return {string} return.newVal
46 * @return {boolean} return.trimmed
47 */
48 $.trimByteLength = function ( safeVal, newVal, byteLimit, fn ) {
49 var startMatches, endMatches, matchesLen, inpParts, chopOff, oldChar, newChar,
50 oldVal = safeVal;
51
52 // Run the hook if one was provided, but only on the length
53 // assessment. The value itself is not to be affected by the hook.
54 if ( $.byteLength( fn ? fn( newVal ) : newVal ) <= byteLimit ) {
55 // Limit was not reached, just remember the new value
56 // and let the user continue.
57 return {
58 newVal: newVal,
59 trimmed: false
60 };
61 }
62
63 // Current input is longer than the active limit.
64 // Figure out what was added and limit the addition.
65 startMatches = 0;
66 endMatches = 0;
67
68 // It is important that we keep the search within the range of
69 // the shortest string's length.
70 // Imagine a user adds text that matches the end of the old value
71 // (e.g. "foo" -> "foofoo"). startMatches would be 3, but without
72 // limiting both searches to the shortest length, endMatches would
73 // also be 3.
74 matchesLen = Math.min( newVal.length, oldVal.length );
75
76 // Count same characters from the left, first.
77 // (if "foo" -> "foofoo", assume addition was at the end).
78 while ( startMatches < matchesLen ) {
79 oldChar = codePointAt( oldVal, startMatches, false );
80 newChar = codePointAt( newVal, startMatches, false );
81 if ( oldChar !== newChar ) {
82 break;
83 }
84 startMatches += oldChar.length;
85 }
86
87 while ( endMatches < ( matchesLen - startMatches ) ) {
88 oldChar = codePointAt( oldVal, oldVal.length - 1 - endMatches, true );
89 newChar = codePointAt( newVal, newVal.length - 1 - endMatches, true );
90 if ( oldChar !== newChar ) {
91 break;
92 }
93 endMatches += oldChar.length;
94 }
95
96 inpParts = [
97 // Same start
98 newVal.slice( 0, startMatches ),
99 // Inserted content
100 newVal.slice( startMatches, newVal.length - endMatches ),
101 // Same end
102 newVal.slice( newVal.length - endMatches )
103 ];
104
105 // Chop off characters from the end of the "inserted content" string
106 // until the limit is statisfied.
107 if ( fn ) {
108 // stop, when there is nothing to slice - T43450
109 while ( $.byteLength( fn( inpParts.join( '' ) ) ) > byteLimit && inpParts[ 1 ].length > 0 ) {
110 // Do not chop off halves of surrogate pairs
111 chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
112 inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
113 }
114 } else {
115 while ( $.byteLength( inpParts.join( '' ) ) > byteLimit ) {
116 // Do not chop off halves of surrogate pairs
117 chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
118 inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
119 }
120 }
121
122 return {
123 newVal: inpParts.join( '' ),
124 // For pathological fn() that always returns a value longer than the limit, we might have
125 // ended up not trimming - check for this case to avoid infinite loops
126 trimmed: newVal !== inpParts.join( '' )
127 };
128 };
129
130 /**
131 * Enforces a byte limit on an input field, so that UTF-8 entries are counted as well,
132 * when, for example, a database field has a byte limit rather than a character limit.
133 * Plugin rationale: Browser has native maxlength for number of characters, this plugin
134 * exists to limit number of bytes instead.
135 *
136 * Can be called with a custom limit (to use that limit instead of the maxlength attribute
137 * value), a filter function (in case the limit should apply to something other than the
138 * exact input value), or both. Order of parameters is important!
139 *
140 * @param {number} [limit] Limit to enforce, fallsback to maxLength-attribute,
141 * called with fetched value as argument.
142 * @param {Function} [fn] Function to call on the string before assessing the length.
143 * @return {jQuery}
144 * @chainable
145 */
146 $.fn.byteLimit = function ( limit, fn ) {
147 // If the first argument is the function,
148 // set fn to the first argument's value and ignore the second argument.
149 if ( $.isFunction( limit ) ) {
150 fn = limit;
151 limit = undefined;
152 // Either way, verify it is a function so we don't have to call
153 // isFunction again after this.
154 } else if ( !fn || !$.isFunction( fn ) ) {
155 fn = undefined;
156 }
157
158 // The following is specific to each element in the collection.
159 return this.each( function ( i, el ) {
160 var $el, elLimit, prevSafeVal;
161
162 $el = $( el );
163
164 // If no limit was passed to byteLimit(), use the maxlength value.
165 // Can't re-use 'limit' variable because it's in the higher scope
166 // that would affect the next each() iteration as well.
167 // Note that we use attribute to read the value instead of property,
168 // because in Chrome the maxLength property by default returns the
169 // highest supported value (no indication that it is being enforced
170 // by choice). We don't want to bind all of this for some ridiculously
171 // high default number, unless it was explicitly set in the HTML.
172 // Also cast to a (primitive) number (most commonly because the maxlength
173 // attribute contains a string, but theoretically the limit parameter
174 // could be something else as well).
175 elLimit = Number( limit === undefined ? $el.attr( 'maxlength' ) : limit );
176
177 // If there is no (valid) limit passed or found in the property,
178 // skip this. The < 0 check is required for Firefox, which returns
179 // -1 (instead of undefined) for maxLength if it is not set.
180 if ( !elLimit || elLimit < 0 ) {
181 return;
182 }
183
184 if ( fn ) {
185 // Save function for reference
186 $el.data( 'byteLimit.callback', fn );
187 }
188
189 // Remove old event handlers (if there are any)
190 $el.off( '.byteLimit' );
191
192 if ( fn ) {
193 // Disable the native maxLength (if there is any), because it interferes
194 // with the (differently calculated) byte limit.
195 // Aside from being differently calculated (average chars with byteLimit
196 // is lower), we also support a callback which can make it to allow longer
197 // values (e.g. count "Foo" from "User:Foo").
198 // maxLength is a strange property. Removing or setting the property to
199 // undefined directly doesn't work. Instead, it can only be unset internally
200 // by the browser when removing the associated attribute (Firefox/Chrome).
201 // https://bugs.chromium.org/p/chromium/issues/detail?id=136004
202 $el.removeAttr( 'maxlength' );
203
204 } else {
205 // If we don't have a callback the bytelimit can only be lower than the charlimit
206 // (that is, there are no characters less than 1 byte in size). So lets (re-)enforce
207 // the native limit for efficiency when possible (it will make the while-loop below
208 // faster by there being less left to interate over).
209 $el.attr( 'maxlength', elLimit );
210 }
211
212 // Safe base value, used to determine the path between the previous state
213 // and the state that triggered the event handler below - and enforce the
214 // limit approppiately (e.g. don't chop from the end if text was inserted
215 // at the beginning of the string).
216 prevSafeVal = '';
217
218 // We need to listen to after the change has already happened because we've
219 // learned that trying to guess the new value and canceling the event
220 // accordingly doesn't work because the new value is not always as simple as:
221 // oldValue + String.fromCharCode( e.which ); because of cut, paste, select-drag
222 // replacements, and custom input methods and what not.
223 // Even though we only trim input after it was changed (never prevent it), we do
224 // listen on events that input text, because there are cases where the text has
225 // changed while text is being entered and keyup/change will not be fired yet
226 // (such as holding down a single key, fires keydown, and after each keydown,
227 // we can trim the previous one).
228 // See https://www.w3.org/TR/DOM-Level-3-Events/#events-keyboard-event-order for
229 // the order and characteristics of the key events.
230 $el.on( eventKeys, function () {
231 var res = $.trimByteLength(
232 prevSafeVal,
233 this.value,
234 elLimit,
235 fn
236 );
237
238 // Only set value property if it was trimmed, because whenever the
239 // value property is set, the browser needs to re-initiate the text context,
240 // which moves the cursor at the end the input, moving it away from wherever it was.
241 // This is a side-effect of limiting after the fact.
242 if ( res.trimmed === true ) {
243 this.value = res.newVal;
244 // Trigger a 'change' event to let other scripts attached to this node know that the value
245 // was changed. This will also call ourselves again, but that's okay, it'll be a no-op.
246 $el.trigger( 'change' );
247 }
248 // Always adjust prevSafeVal to reflect the input value. Not doing this could cause
249 // trimByteLength to compare the new value to an empty string instead of the
250 // old value, resulting in trimming always from the end (T42850).
251 prevSafeVal = res.newVal;
252 } );
253 } );
254 };
255
256 /**
257 * @class jQuery
258 * @mixins jQuery.plugin.byteLimit
259 */
260 }( jQuery ) );