9 * Calculate the byte length of a string (accounting for UTF-8).
11 * @author Jan Paul Posma, 2011
12 * @author Timo Tijhof, 2012
13 * @author David Chan, 2013
18 function byteLength( str
) {
19 // This basically figures out how many bytes a UTF-16 string (which is what js sees)
20 // will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
21 // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
22 // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
23 // edge cases such as illegal sequences, but that should never happen.
25 // https://en.wikipedia.org/wiki/UTF-8#Description
26 // The mapping from UTF-16 code units to UTF-8 bytes is as follows:
27 // > Range 0000-007F: codepoints that become 1 byte of UTF-8
28 // > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
29 // > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
30 // > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
31 // > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 (continued)
34 .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
35 .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
39 // Like String#charAt, but return the pair of UTF-16 surrogates for characters outside of BMP.
40 function codePointAt( string
, offset
, backwards
) {
41 // We don't need to check for offsets at the beginning or end of string,
42 // String#slice will simply return a shorter (or empty) substring.
43 var maybePair
= backwards
?
44 string
.slice( offset
- 1, offset
+ 1 ) :
45 string
.slice( offset
, offset
+ 2 );
46 if ( /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( maybePair
) ) {
49 return string
.charAt( offset
);
54 * Utility function to trim down a string, based on byteLimit
55 * and given a safe start position. It supports insertion anywhere
56 * in the string, so "foo" to "fobaro" if limit is 4 will result in
57 * "fobo", not "foba". Basically emulating the native maxlength by
58 * reconstructing where the insertion occurred.
60 * @param {string} safeVal Known value that was previously returned by this
61 * function, if none, pass empty string.
62 * @param {string} newVal New value that may have to be trimmed down.
63 * @param {number} byteLimit Number of bytes the value may be in size.
64 * @param {Function} [fn] Function to call on the string before assessing the length.
66 * @return {string} return.newVal
67 * @return {boolean} return.trimmed
69 function trimByteLength( safeVal
, newVal
, byteLimit
, fn
) {
70 var startMatches
, endMatches
, matchesLen
, inpParts
, chopOff
, oldChar
, newChar
,
73 // Run the hook if one was provided, but only on the length
74 // assessment. The value itself is not to be affected by the hook.
75 if ( byteLength( fn
? fn( newVal
) : newVal
) <= byteLimit
) {
76 // Limit was not reached, just remember the new value
77 // and let the user continue.
84 // Current input is longer than the active limit.
85 // Figure out what was added and limit the addition.
89 // It is important that we keep the search within the range of
90 // the shortest string's length.
91 // Imagine a user adds text that matches the end of the old value
92 // (e.g. "foo" -> "foofoo"). startMatches would be 3, but without
93 // limiting both searches to the shortest length, endMatches would
95 matchesLen
= Math
.min( newVal
.length
, oldVal
.length
);
97 // Count same characters from the left, first.
98 // (if "foo" -> "foofoo", assume addition was at the end).
99 while ( startMatches
< matchesLen
) {
100 oldChar
= codePointAt( oldVal
, startMatches
, false );
101 newChar
= codePointAt( newVal
, startMatches
, false );
102 if ( oldChar
!== newChar
) {
105 startMatches
+= oldChar
.length
;
108 while ( endMatches
< ( matchesLen
- startMatches
) ) {
109 oldChar
= codePointAt( oldVal
, oldVal
.length
- 1 - endMatches
, true );
110 newChar
= codePointAt( newVal
, newVal
.length
- 1 - endMatches
, true );
111 if ( oldChar
!== newChar
) {
114 endMatches
+= oldChar
.length
;
119 newVal
.slice( 0, startMatches
),
121 newVal
.slice( startMatches
, newVal
.length
- endMatches
),
123 newVal
.slice( newVal
.length
- endMatches
)
126 // Chop off characters from the end of the "inserted content" string
127 // until the limit is statisfied.
129 // stop, when there is nothing to slice - T43450
130 while ( byteLength( fn( inpParts
.join( '' ) ) ) > byteLimit
&& inpParts
[ 1 ].length
> 0 ) {
131 // Do not chop off halves of surrogate pairs
132 chopOff
= /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts
[ 1 ] ) ? 2 : 1;
133 inpParts
[ 1 ] = inpParts
[ 1 ].slice( 0, -chopOff
);
136 while ( byteLength( inpParts
.join( '' ) ) > byteLimit
) {
137 // Do not chop off halves of surrogate pairs
138 chopOff
= /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts
[ 1 ] ) ? 2 : 1;
139 inpParts
[ 1 ] = inpParts
[ 1 ].slice( 0, -chopOff
);
144 newVal
: inpParts
.join( '' ),
145 // For pathological fn() that always returns a value longer than the limit, we might have
146 // ended up not trimming - check for this case to avoid infinite loops
147 trimmed
: newVal
!== inpParts
.join( '' )
152 byteLength
: byteLength
,
153 trimByteLength
: trimByteLength