X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=8e98abd618e4ffce26201919b1f0e7c38813f99c;hp=6d0368c7a15d63e167475865952023f9a48b1cb8;hb=f600b4ede9a6390cc5e929d2a48ed98af2edbc60;hpb=e0f53bfb8dd461f52dbfe1339323704e7cac55fe diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 6d0368c7a1..8e98abd618 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -20,6 +20,8 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -353,24 +355,33 @@ class LanguageConverter { } /* we convert everything except: - * 1. HTML markups (anything between < and >) - * 2. HTML entities - * 3. placeholders created by the parser - */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + 1. HTML markups (anything between < and >) + 2. HTML entities + 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. + */ + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of