From: Brian Wolff Date: Sun, 24 Jan 2016 10:29:10 +0000 (-0500) Subject: SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit X-Git-Tag: 1.31.0-rc.0~1507 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=fbe78cfa094645b907d0fd2885c5797321f794eb;ds=sidebyside SECURITY: XSS in langconverter when regex hits pcre.backtrack_limit Adjust regexes for what not to convert to avoid backtracking by preferring possesive quantifiers Add check that we really have matched to the end of the string, and log error if the regex hits some sort of error preventing the entire string from being matched. Should the regex not match to the end, then language conversion is disabled for the string. Bug: T124404 Change-Id: I4f0c171c7da804e9c1508ef1f59556665a318f6a --- diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 67c0ca7412..00bc02db45 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -20,6 +20,8 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Logger\LoggerFactory; + /** * Base class for language conversion. * @ingroup Language @@ -353,24 +355,30 @@ class LanguageConverter { } /* we convert everything except: - * 1. HTML markups (anything between < and >) - * 2. HTML entities - * 3. placeholders created by the parser - */ - $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; + 1. HTML markups (anything between < and >) + 2. HTML entities + 3. placeholders created by the parser + IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). + Minimize use of backtracking where possible. + */ + $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>]+$|^[^<>]*>'; + $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; + + // Optimize for the common case where these tags have + // few or no children. Thus try and possesively get as much as + // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags - $codefix = '.+?<\/code>|'; + $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of