+ // DOMDocument::loadHTML apparently isn't very good with encodings, so
+ // convert input to ASCII by encoding everything above 128 as entities.
+ if ( function_exists( 'mb_convert_encoding' ) ) {
+ $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
+ } else {
+ $html = preg_replace_callback( '/[\x{80}-\x{10ffff}]/u', function ( $m ) {
+ return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';';
+ }, $this->html );
+ }