X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=a84c4b82814fe0651a196dcf7ca5585af7549f50;hb=e1aabf2f24aef20adc72db8a750704cbb33236c6;hp=67c0ca7412a51a1592c15989e5ea081bb916f48f;hpb=a8379682a46a428320c88702c800a6107c015137;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 67c0ca7412..a84c4b8281 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -20,6 +20,8 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -37,6 +39,7 @@ class LanguageConverter { */ static public $languagesWithVariants = [ 'en', + 'crh', 'gan', 'iu', 'kk', @@ -351,26 +354,34 @@ class LanguageConverter { if ( $this->guessVariant( $text, $toVariant ) ) { return $text; } - /* we convert everything except: - * 1. HTML markups (anything between < and >) - * 2. HTML entities - * 3. placeholders created by the parser - */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + 1. HTML markups (anything between < and >) + 2. HTML entities + 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. + */ + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of