From: Brian Wolff Date: Thu, 11 Feb 2016 22:08:03 +0000 (-0500) Subject: SECURITY: Handle -{}- syntax in attributes safely X-Git-Tag: 1.31.0-rc.0~1506 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=f21f3942eb10d7e688eb25261ac3a9478268cbd3 SECURITY: Handle -{}- syntax in attributes safely Previously, if one had an attribute with the contents "-{}-foo-{}-", foo would get replaced by language converter as if it wasn't in an attribute. This lead to an XSS attack. This breaks doing manual conversions in url href's (or any other attribute that goes through an escaping method other than Sanitizer's). e.g. http://{sr-el:foo';sr-ec:bar}.com won't work anymore. See also T87332 Bug: T119158 Change-Id: Idbc45cac12c309b0ccb4adeff6474fa527b48edb --- diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 00bc02db45..f9610fa93c 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -376,9 +376,12 @@ class LanguageConverter { $scriptfix = ']*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; // disable conversion of
 tags
 		$prefix = ']*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+		// The "|.*+)" at the end, is in case we missed some part of html syntax,
+		// we will fail securely (hopefully) by matching the rest of the string.
+		$htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
 
-		$reg = '/' . $codefix . $scriptfix . $prefix .
-			'<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+		$reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+			'&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
 		$startPos = 0;
 		$sourceBlob = '';
 		$literalBlob = '';
@@ -658,29 +661,41 @@ class LanguageConverter {
 		$out = '';
 		$length = strlen( $text );
 		$shouldConvert = !$this->guessVariant( $text, $variant );
-
-		while ( $startPos < $length ) {
-			$pos = strpos( $text, '-{', $startPos );
-
-			if ( $pos === false ) {
+		$continue = 1;
+
+		$noScript = '.*?<\/script>(*SKIP)(*FAIL)';
+		$noStyle = '.*?<\/style>(*SKIP)(*FAIL)';
+		$noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
+		while ( $startPos < $length && $continue ) {
+			$continue = preg_match(
+				// Only match -{ outside of html.
+				"/$noScript|$noStyle|$noHtml|-\{/",
+				$text,
+				$m,
+				PREG_OFFSET_CAPTURE,
+				$startPos
+			);
+
+			if ( !$continue ) {
 				// No more markup, append final segment
 				$fragment = substr( $text, $startPos );
 				$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
 				return $out;
 			}
 
-			// Markup found
+			// Offset of the match of the regex pattern.
+			$pos = $m[0][1];
+
 			// Append initial segment
 			$fragment = substr( $text, $startPos, $pos - $startPos );
 			$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
-
-			// Advance position
+			// -{ marker found, not in attribute
+			// Advance position up to -{ marker.
 			$startPos = $pos;
-
 			// Do recursive conversion
+			// Note: This passes $startPos by reference, and advances it.
 			$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
 		}
-
 		return $out;
 	}
 
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index ff574d153c..fb549f5ae1 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -18505,6 +18505,20 @@ all additional text is vanished
 

all additional text is vanished

!! end +!! test +Language converter glossary rules inside attributes (T119158) +!! options +language=sr variant=sr-el +!! wikitext +-{H|abc=>sr-el:" onload="alert(1)" data-foo="}- + +[[File:Foobar.jpg|alt=-{}-abc-{}-]] +!! html +

+

" onload="alert(1)" data-foo=" +

+!! end + !! test Self closed html pairs (T7487) !! wikitext