+ }
+
+ /**
+ * Remove bytes that represent an incomplete Unicode character
+ * at the end of string (e.g. bytes of the char are missing)
+ *
+ * @param $string String
+ * @return string
+ */
+ protected function removeBadCharLast( $string ) {
+ $char = ord( $string[strlen( $string ) - 1] );
+ $m = array();
+ if ( $char >= 0xc0 ) {
+ # We got the first byte only of a multibyte char; remove it.
+ $string = substr( $string, 0, -1 );
+ } elseif ( $char >= 0x80 &&
+ preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
+ '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
+ {
+ # We chopped in the middle of a character; remove it
+ $string = $m[1];
+ }
+ return $string;
+ }
+
+ /**
+ * Remove bytes that represent an incomplete Unicode character
+ * at the start of string (e.g. bytes of the char are missing)
+ *
+ * @param $string String
+ * @return string
+ */
+ protected function removeBadCharFirst( $string ) {
+ $char = ord( $string[0] );
+ if ( $char >= 0x80 && $char < 0xc0 ) {
+ # We chopped in the middle of a character; remove the whole thing
+ $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
+ }
+ return $string;
+ }
+
+ /*
+ * Truncate a string of valid HTML to a specified length in bytes,
+ * appending an optional string (e.g. for ellipses), and return valid HTML
+ *
+ * This is only intended for styled/linked text, such as HTML with
+ * tags like <span> and <a>, were the tags are self-contained (valid HTML)
+ *
+ * Note: tries to fix broken HTML with MWTidy
+ *
+ * @param string $text String to truncate
+ * @param int $length (zero/positive) Maximum length (excluding ellipses)
+ * @param string $ellipsis String to append to the truncated text
+ * @returns string
+ */
+ function truncateHtml( $text, $length, $ellipsis = '...' ) {
+ # Use the localized ellipsis character
+ if ( $ellipsis == '...' ) {
+ $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
+ }
+ # Check if there is no need to truncate
+ if ( $length <= 0 ) {
+ return $ellipsis; // no text shown, nothing to format
+ } elseif ( strlen( $text ) <= $length ) {
+ return $text; // string short enough even *with* HTML
+ }
+ $text = MWTidy::tidy( $text ); // fix tags
+ $displayLen = 0; // innerHTML legth so far
+ $testingEllipsis = false; // checking if ellipses will make string longer/equal?
+ $tagType = 0; // 0-open, 1-close
+ $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
+ $entityState = 0; // 0-not entity, 1-entity
+ $tag = $ret = $ch = '';
+ $openTags = array();
+ $textLen = strlen( $text );
+ for ( $pos = 0; $pos < $textLen; ++$pos ) {
+ $ch = $text[$pos];
+ $lastCh = $pos ? $text[$pos - 1] : '';
+ $ret .= $ch; // add to result string
+ if ( $ch == '<' ) {
+ $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
+ $entityState = 0; // for bad HTML
+ $bracketState = 1; // tag started (checking for backslash)
+ } elseif ( $ch == '>' ) {
+ $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
+ $entityState = 0; // for bad HTML
+ $bracketState = 0; // out of brackets
+ } elseif ( $bracketState == 1 ) {
+ if ( $ch == '/' ) {
+ $tagType = 1; // close tag (e.g. "</span>")
+ } else {
+ $tagType = 0; // open tag (e.g. "<span>")
+ $tag .= $ch;
+ }
+ $bracketState = 2; // building tag name
+ } elseif ( $bracketState == 2 ) {
+ if ( $ch != ' ' ) {
+ $tag .= $ch;
+ } else {
+ // Name found (e.g. "<a href=..."), add on tag attributes...
+ $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
+ }
+ } elseif ( $bracketState == 0 ) {
+ if ( $entityState ) {
+ if ( $ch == ';' ) {
+ $entityState = 0;
+ $displayLen++; // entity is one displayed char
+ }
+ } else {
+ if ( $ch == '&' ) {
+ $entityState = 1; // entity found, (e.g. " ")
+ } else {
+ $displayLen++; // this char is displayed
+ // Add on the other display text after this...
+ $skipped = $this->truncate_skip(
+ $ret, $text, "<>&", $pos + 1, $length - $displayLen );
+ $displayLen += $skipped;
+ $pos += $skipped;
+ }
+ }