*/
static public $languagesWithVariants = [
'en',
+ 'crh',
'gan',
'iu',
'kk',
$req = $this->getURLVariant();
+ Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
+
if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
$req = $this->getUserVariant();
} elseif ( !$req ) {
if ( $this->guessVariant( $text, $toVariant ) ) {
return $text;
}
-
/* we convert everything except:
1. HTML markups (anything between < and >)
2. HTML entities
$scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
// disable conversion of <pre> tags
$prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+ // The "|.*+)" at the end, is in case we missed some part of html syntax,
+ // we will fail securely (hopefully) by matching the rest of the string.
+ $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
- $reg = '/' . $codefix . $scriptfix . $prefix .
- '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+ $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+ '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
$startPos = 0;
$sourceBlob = '';
$literalBlob = '';
// Guard against delimiter nulls in the input
// (should never happen: see T159174)
$text = str_replace( "\000", '', $text );
+ $text = str_replace( "\004", '', $text );
$markupMatches = null;
$elementMatches = null;
// We hit the end.
$elementPos = strlen( $text );
$element = '';
+ } elseif ( substr( $element, -1 ) === "\004" ) {
+ // This can sometimes happen if we have
+ // unclosed html tags (For example
+ // when converting a title attribute
+ // during a recursive call that contains
+ // a < e.g. <div title="<">.
+ $element = substr( $element, 0, -1 );
}
} else {
// If we hit here, then Language Converter could be tricked
$log = LoggerFactory::getInstance( 'languageconverter' );
$log->error( "Hit pcre.backtrack_limit in " . __METHOD__
. ". Disabling language conversion for this page.",
- array(
+ [
"method" => __METHOD__,
"variant" => $toVariant,
"startOfText" => substr( $text, 0, 500 )
- )
+ ]
);
return $text;
}
if ( $element !== ''
&& preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
) {
+ // FIXME, this decodes entities, so if you have something
+ // like <div title="foo<bar"> the bar won't get
+ // translated since after entity decoding it looks like
+ // unclosed html and we call this method recursively
+ // on attributes.
$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
+ // Ensure self-closing tags stay self-closing.
+ $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
$changed = false;
foreach ( [ 'title', 'alt' ] as $attrName ) {
if ( !isset( $attrs[$attrName] ) ) {
}
if ( $changed ) {
$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
- $elementMatches[3];
+ $close . $elementMatches[3];
}
}
$literalBlob .= $element . "\000";
$out = '';
$length = strlen( $text );
$shouldConvert = !$this->guessVariant( $text, $variant );
-
- while ( $startPos < $length ) {
- $pos = strpos( $text, '-{', $startPos );
-
- if ( $pos === false ) {
+ $continue = 1;
+
+ $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
+ $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
+ // phpcs:ignore Generic.Files.LineLength
+ $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
+ while ( $startPos < $length && $continue ) {
+ $continue = preg_match(
+ // Only match -{ outside of html.
+ "/$noScript|$noStyle|$noHtml|-\{/",
+ $text,
+ $m,
+ PREG_OFFSET_CAPTURE,
+ $startPos
+ );
+
+ if ( !$continue ) {
// No more markup, append final segment
$fragment = substr( $text, $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
return $out;
}
- // Markup found
+ // Offset of the match of the regex pattern.
+ $pos = $m[0][1];
+
// Append initial segment
$fragment = substr( $text, $startPos, $pos - $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
-
- // Advance position
+ // -{ marker found, not in attribute
+ // Advance position up to -{ marker.
$startPos = $pos;
-
// Do recursive conversion
+ // Note: This passes $startPos by reference, and advances it.
$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
}
-
return $out;
}