Merge "Call Database::timestamp in some tests"
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index f9610fa..d11838a 100644 (file)
@@ -39,6 +39,7 @@ class LanguageConverter {
         */
        static public $languagesWithVariants = [
                'en',
+               'crh',
                'gan',
                'iu',
                'kk',
@@ -162,6 +163,8 @@ class LanguageConverter {
 
                $req = $this->getURLVariant();
 
+               Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
+
                if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
                        $req = $this->getUserVariant();
                } elseif ( !$req ) {
@@ -353,7 +356,6 @@ class LanguageConverter {
                if ( $this->guessVariant( $text, $toVariant ) ) {
                        return $text;
                }
-
                /* we convert everything except:
                   1. HTML markups (anything between < and >)
                   2. HTML entities
@@ -389,6 +391,7 @@ class LanguageConverter {
                // Guard against delimiter nulls in the input
                // (should never happen: see T159174)
                $text = str_replace( "\000", '', $text );
+               $text = str_replace( "\004", '', $text );
 
                $markupMatches = null;
                $elementMatches = null;
@@ -403,6 +406,13 @@ class LanguageConverter {
                                        // We hit the end.
                                        $elementPos = strlen( $text );
                                        $element = '';
+                               } elseif ( substr( $element, -1 ) === "\004" ) {
+                                       // This can sometimes happen if we have
+                                       // unclosed html tags (For example
+                                       // when converting a title attribute
+                                       // during a recursive call that contains
+                                       // a &lt; e.g. <div title="&lt;">.
+                                       $element = substr( $element, 0, -1 );
                                }
                        } else {
                                // If we hit here, then Language Converter could be tricked
@@ -412,11 +422,11 @@ class LanguageConverter {
                                $log = LoggerFactory::getInstance( 'languageconverter' );
                                $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
                                        . ". Disabling language conversion for this page.",
-                                       array(
+                                       [
                                                "method" => __METHOD__,
                                                "variant" => $toVariant,
                                                "startOfText" => substr( $text, 0, 500 )
-                                       )
+                                       ]
                                );
                                return $text;
                        }
@@ -430,7 +440,14 @@ class LanguageConverter {
                        if ( $element !== ''
                                && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
                        ) {
+                               // FIXME, this decodes entities, so if you have something
+                               // like <div title="foo&lt;bar"> the bar won't get
+                               // translated since after entity decoding it looks like
+                               // unclosed html and we call this method recursively
+                               // on attributes.
                                $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+                               // Ensure self-closing tags stay self-closing.
+                               $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
                                $changed = false;
                                foreach ( [ 'title', 'alt' ] as $attrName ) {
                                        if ( !isset( $attrs[$attrName] ) ) {
@@ -449,7 +466,7 @@ class LanguageConverter {
                                }
                                if ( $changed ) {
                                        $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
-                                               $elementMatches[3];
+                                               $close . $elementMatches[3];
                                }
                        }
                        $literalBlob .= $element . "\000";
@@ -665,6 +682,7 @@ class LanguageConverter {
 
                $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
                $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
+               // phpcs:ignore Generic.Files.LineLength
                $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
                while ( $startPos < $length && $continue ) {
                        $continue = preg_match(
@@ -949,11 +967,11 @@ class LanguageConverter {
         * Parse the conversion table stored in the cache.
         *
         * The tables should be in blocks of the following form:
-        *              -{
-        *                      word => word ;
-        *                      word => word ;
-        *                      ...
-        *              }-
+        *              -{
+        *                      word => word ;
+        *                      word => word ;
+        *                      ...
+        *              }-
         *
         * To make the tables more manageable, subpages are allowed
         * and will be parsed recursively if $recursive == true.