Previously, if one had an attribute with the contents
"-{}-foo-{}-", foo would get replaced by language converter as if
it wasn't in an attribute. This lead to an XSS attack.
This breaks doing manual conversions in url href's (or any
other attribute that goes through an escaping method
other than Sanitizer's). e.g. http://{sr-el:foo';sr-ec:bar}.com
won't work anymore. See also T87332
Bug: T119158
Change-Id: Idbc45cac12c309b0ccb4adeff6474fa527b48edb
$scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
// disable conversion of <pre> tags
$prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
$scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
// disable conversion of <pre> tags
$prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+ // The "|.*+)" at the end, is in case we missed some part of html syntax,
+ // we will fail securely (hopefully) by matching the rest of the string.
+ $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
- $reg = '/' . $codefix . $scriptfix . $prefix .
- '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+ $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+ '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
$startPos = 0;
$sourceBlob = '';
$literalBlob = '';
$startPos = 0;
$sourceBlob = '';
$literalBlob = '';
$out = '';
$length = strlen( $text );
$shouldConvert = !$this->guessVariant( $text, $variant );
$out = '';
$length = strlen( $text );
$shouldConvert = !$this->guessVariant( $text, $variant );
-
- while ( $startPos < $length ) {
- $pos = strpos( $text, '-{', $startPos );
-
- if ( $pos === false ) {
+ $continue = 1;
+
+ $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
+ $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
+ $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
+ while ( $startPos < $length && $continue ) {
+ $continue = preg_match(
+ // Only match -{ outside of html.
+ "/$noScript|$noStyle|$noHtml|-\{/",
+ $text,
+ $m,
+ PREG_OFFSET_CAPTURE,
+ $startPos
+ );
+
+ if ( !$continue ) {
// No more markup, append final segment
$fragment = substr( $text, $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
return $out;
}
// No more markup, append final segment
$fragment = substr( $text, $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
return $out;
}
+ // Offset of the match of the regex pattern.
+ $pos = $m[0][1];
+
// Append initial segment
$fragment = substr( $text, $startPos, $pos - $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
// Append initial segment
$fragment = substr( $text, $startPos, $pos - $startPos );
$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
+ // -{ marker found, not in attribute
+ // Advance position up to -{ marker.
// Do recursive conversion
// Do recursive conversion
+ // Note: This passes $startPos by reference, and advances it.
$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
}
$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
}
<p>all additional text is vanished</p>
!! end
<p>all additional text is vanished</p>
!! end
+!! test
+Language converter glossary rules inside attributes (T119158)
+!! options
+language=sr variant=sr-el
+!! wikitext
+-{H|abc=>sr-el:" onload="alert(1)" data-foo="}-
+
+[[File:Foobar.jpg|alt=-{}-abc-{}-]]
+!! html
+<p>
+</p><p><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="" onload="alert(1)" data-foo="" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220"></a>
+</p>
+!! end
+
!! test
Self closed html pairs (T7487)
!! wikitext
!! test
Self closed html pairs (T7487)
!! wikitext