Merge "SpecialMovepage: Convert form to use OOUI controls"
[lhc/web/wiklou.git] / includes / HtmlFormatter.php
index 77b8d35..221cefb 100644 (file)
@@ -63,7 +63,15 @@ class HtmlFormatter {
         */
        public function getDoc() {
                if ( !$this->doc ) {
-                       $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
+                       // DOMDocument::loadHTML apparently isn't very good with encodings, so
+                       // convert input to ASCII by encoding everything above 128 as entities.
+                       if ( function_exists( 'mb_convert_encoding' ) ) {
+                               $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' );
+                       } else {
+                               $html = preg_replace_callback( '/[\x{80}-\x{10ffff}]/u', function ( $m ) {
+                                       return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';';
+                               }, $this->html );
+                       }
 
                        // Workaround for bug that caused spaces before references
                        // to disappear during processing:
@@ -244,7 +252,14 @@ class HtmlFormatter {
                        ) );
                }
                $html = $replacements->replace( $html );
-               $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
+
+               if ( function_exists( 'mb_convert_encoding' ) ) {
+                       // Just in case the conversion in getDoc() above used named
+                       // entities that aren't known to html_entity_decode().
+                       $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
+               } else {
+                       $html = html_entity_decode( $html, ENT_COMPAT, 'utf-8' );
+               }
                return $html;
        }
 
@@ -261,7 +276,6 @@ class HtmlFormatter {
        public function getText( $element = null ) {
 
                if ( $this->doc ) {
-                       wfProfileIn( __METHOD__ . '-dom' );
                        if ( $element !== null && !( $element instanceof DOMElement ) ) {
                                $element = $this->doc->getElementById( $element );
                        }
@@ -277,9 +291,7 @@ class HtmlFormatter {
                                $body->appendChild( $element );
                        }
                        $html = $this->doc->saveHTML();
-                       wfProfileOut( __METHOD__ . '-dom' );
 
-                       wfProfileIn( __METHOD__ . '-fixes' );
                        $html = $this->fixLibXml( $html );
                        if ( wfIsWindows() ) {
                                // Cleanup for CRLF misprocessing of unknown origin on Windows.
@@ -288,7 +300,6 @@ class HtmlFormatter {
                                // XML code paths if possible and fix there.
                                $html = str_replace( '
', '', $html );
                        }
-                       wfProfileOut( __METHOD__ . '-fixes' );
                } else {
                        $html = $this->html;
                }
@@ -296,12 +307,10 @@ class HtmlFormatter {
                $html = preg_replace( '/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
                $html = $this->onHtmlReady( $html );
 
-               wfProfileIn( __METHOD__ . '-flatten' );
                if ( $this->elementsToFlatten ) {
                        $elements = implode( '|', $this->elementsToFlatten );
                        $html = preg_replace( "#</?($elements)\\b[^>]*>#is", '', $html );
                }
-               wfProfileOut( __METHOD__ . '-flatten' );
 
                return $html;
        }