SECURITY: Handle -{}- syntax in attributes safely

author Brian Wolff <bawolff+wn@gmail.com>

Thu, 11 Feb 2016 22:08:03 +0000 (17:08 -0500)

committer Reedy <reedy@wikimedia.org>

Wed, 15 Nov 2017 03:33:03 +0000 (03:33 +0000)
author Brian Wolff <bawolff+wn@gmail.com>
Thu, 11 Feb 2016 22:08:03 +0000 (17:08 -0500)
committer Reedy <reedy@wikimedia.org>
Wed, 15 Nov 2017 03:33:03 +0000 (03:33 +0000)
diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php

index 00bc02d..f9610fa 100644 (file)
--- a/languages/LanguageConverter.php
+++ b/languages/LanguageConverter.php
@@ -376,9 +376,12 @@ class LanguageConverter {
                 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
                 // disable conversion of <pre> tags
                 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
                 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
                 // disable conversion of <pre> tags
                 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
+               // The "|.*+)" at the end, is in case we missed some part of html syntax,
+               // we will fail securely (hopefully) by matching the rest of the string.
+               $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
  
  
-               $reg = '/' . $codefix . $scriptfix . $prefix .
-                       '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
+               $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
+                       '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
                 $startPos = 0;
                 $sourceBlob = '';
                 $literalBlob = '';
                 $startPos = 0;
                 $sourceBlob = '';
                 $literalBlob = '';
@@ -658,29 +661,41 @@ class LanguageConverter {
                 $out = '';
                 $length = strlen( $text );
                 $shouldConvert = !$this->guessVariant( $text, $variant );
                 $out = '';
                 $length = strlen( $text );
                 $shouldConvert = !$this->guessVariant( $text, $variant );
-
-               while ( $startPos < $length ) {
-                       $pos = strpos( $text, '-{', $startPos );
-
-                       if ( $pos === false ) {
+               $continue = 1;
+
+               $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
+               $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
+               $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
+               while ( $startPos < $length && $continue ) {
+                       $continue = preg_match(
+                               // Only match -{ outside of html.
+                               "/$noScript|$noStyle|$noHtml|-\{/",
+                               $text,
+                               $m,
+                               PREG_OFFSET_CAPTURE,
+                               $startPos
+                       );
+
+                       if ( !$continue ) {
                                 // No more markup, append final segment
                                 $fragment = substr( $text, $startPos );
                                 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
                                 return $out;
                         }
  
                                 // No more markup, append final segment
                                 $fragment = substr( $text, $startPos );
                                 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
                                 return $out;
                         }
  
-                       // Markup found
+                       // Offset of the match of the regex pattern.
+                       $pos = $m[0][1];
+
                         // Append initial segment
                         $fragment = substr( $text, $startPos, $pos - $startPos );
                         $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
                         // Append initial segment
                         $fragment = substr( $text, $startPos, $pos - $startPos );
                         $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
-
-                       // Advance position
+                       // -{ marker found, not in attribute
+                       // Advance position up to -{ marker.
                         $startPos = $pos;
                         $startPos = $pos;
-
                         // Do recursive conversion
                         // Do recursive conversion
+                       // Note: This passes $startPos by reference, and advances it.
                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
                 }
                         $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
                 }
-
                 return $out;
         }
  
                 return $out;
         }
  
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt

index ff574d1..fb549f5 100644 (file)
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -18505,6 +18505,20 @@ all additional text is vanished
  <p>all additional text is vanished</p>
  !! end
  
  <p>all additional text is vanished</p>
  !! end
  
+!! test
+Language converter glossary rules inside attributes (T119158)
+!! options
+language=sr variant=sr-el
+!! wikitext
+-{H|abc=>sr-el:" onload="alert(1)" data-foo="}-
+
+[[File:Foobar.jpg|alt=-{}-abc-{}-]]
+!! html
+<p>
+</p><p><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="&quot; onload=&quot;alert(1)&quot; data-foo=&quot;" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220"></a>
+</p>
+!! end
+
  !! test
  Self closed html pairs (T7487)
  !! wikitext
  !! test
  Self closed html pairs (T7487)
  !! wikitext
author	Brian Wolff <bawolff+wn@gmail.com>
	Thu, 11 Feb 2016 22:08:03 +0000 (17:08 -0500)
committer	Reedy <reedy@wikimedia.org>
	Wed, 15 Nov 2017 03:33:03 +0000 (03:33 +0000)
languages/LanguageConverter.php		patch \| blob \| history
tests/parser/parserTests.txt		patch \| blob \| history