Merge "Remove references to deleted $this->debug in installer"
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index f9610fa..a84c4b8 100644 (file)
@@ -39,6 +39,7 @@ class LanguageConverter {
         */
        static public $languagesWithVariants = [
                'en',
+               'crh',
                'gan',
                'iu',
                'kk',
@@ -353,7 +354,6 @@ class LanguageConverter {
                if ( $this->guessVariant( $text, $toVariant ) ) {
                        return $text;
                }
-
                /* we convert everything except:
                   1. HTML markups (anything between < and >)
                   2. HTML entities
@@ -389,6 +389,7 @@ class LanguageConverter {
                // Guard against delimiter nulls in the input
                // (should never happen: see T159174)
                $text = str_replace( "\000", '', $text );
+               $text = str_replace( "\004", '', $text );
 
                $markupMatches = null;
                $elementMatches = null;
@@ -403,6 +404,13 @@ class LanguageConverter {
                                        // We hit the end.
                                        $elementPos = strlen( $text );
                                        $element = '';
+                               } elseif ( substr( $element, -1 ) === "\004" ) {
+                                       // This can sometimes happen if we have
+                                       // unclosed html tags (For example
+                                       // when converting a title attribute
+                                       // during a recursive call that contains
+                                       // a &lt; e.g. <div title="&lt;">.
+                                       $element = substr( $element, 0, -1 );
                                }
                        } else {
                                // If we hit here, then Language Converter could be tricked
@@ -412,11 +420,11 @@ class LanguageConverter {
                                $log = LoggerFactory::getInstance( 'languageconverter' );
                                $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
                                        . ". Disabling language conversion for this page.",
-                                       array(
+                                       [
                                                "method" => __METHOD__,
                                                "variant" => $toVariant,
                                                "startOfText" => substr( $text, 0, 500 )
-                                       )
+                                       ]
                                );
                                return $text;
                        }
@@ -430,7 +438,14 @@ class LanguageConverter {
                        if ( $element !== ''
                                && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
                        ) {
+                               // FIXME, this decodes entities, so if you have something
+                               // like <div title="foo&lt;bar"> the bar won't get
+                               // translated since after entity decoding it looks like
+                               // unclosed html and we call this method recursively
+                               // on attributes.
                                $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+                               // Ensure self-closing tags stay self-closing.
+                               $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
                                $changed = false;
                                foreach ( [ 'title', 'alt' ] as $attrName ) {
                                        if ( !isset( $attrs[$attrName] ) ) {
@@ -449,7 +464,7 @@ class LanguageConverter {
                                }
                                if ( $changed ) {
                                        $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
-                                               $elementMatches[3];
+                                               $close . $elementMatches[3];
                                }
                        }
                        $literalBlob .= $element . "\000";
@@ -665,7 +680,9 @@ class LanguageConverter {
 
                $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
                $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
+               // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
                $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
+               // @codingStandardsIgnoreEnd
                while ( $startPos < $length && $continue ) {
                        $continue = preg_match(
                                // Only match -{ outside of html.