Protect language converter markup in the preprocessor.
authorC. Scott Ananian <cscott@cscott.net>
Tue, 20 Sep 2016 22:26:32 +0000 (18:26 -0400)
committerTim Starling <tstarling@wikimedia.org>
Thu, 15 Dec 2016 23:50:44 +0000 (23:50 +0000)
This ensures that `{{echo|-{R|foo}-}}` is parsed correctly as
a template invocation with a single argument, not as two separate
arguments split by the `|`.

Bug: T146304
Change-Id: I709d007c70a3fd19264790055042c615999b2f67

includes/parser/Preprocessor.php
includes/parser/Preprocessor_DOM.php
includes/parser/Preprocessor_Hash.php
tests/parser/parserTests.txt

index cc98abd..426b550 100644 (file)
@@ -48,7 +48,13 @@ abstract class Preprocessor {
                        'names' => [ 2 => null ],
                        'min' => 2,
                        'max' => 2,
-               ]
+               ],
+               '-{' => [
+                       'end' => '}-',
+                       'names' => [ 1 => null ],
+                       'min' => 1,
+                       'max' => 1,
+               ],
        ];
 
        /**
index 5da7cd7..950d66d 100644 (file)
@@ -193,6 +193,8 @@ class Preprocessor_DOM extends Preprocessor {
         * @return string
         */
        public function preprocessToXml( $text, $flags = 0 ) {
+               global $wgDisableLangConversion;
+
                $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
 
                $xmlishElements = $this->parser->getStripList();
@@ -220,6 +222,10 @@ class Preprocessor_DOM extends Preprocessor {
                $stack = new PPDStack;
 
                $searchBase = "[{<\n"; # }
+               if ( !$wgDisableLangConversion ) {
+                       $searchBase .= '-';
+               }
+
                // For fast reverse searches
                $revText = strrev( $text );
                $lengthText = strlen( $text );
@@ -298,7 +304,10 @@ class Preprocessor_DOM extends Preprocessor {
                                                break;
                                        }
                                } else {
-                                       $curChar = $text[$i];
+                                       $curChar = $curTwoChar = $text[$i];
+                                       if ( ( $i + 1 ) < $lengthText ) {
+                                               $curTwoChar .= $text[$i + 1];
+                                       }
                                        if ( $curChar == '|' ) {
                                                $found = 'pipe';
                                        } elseif ( $curChar == '=' ) {
@@ -311,11 +320,20 @@ class Preprocessor_DOM extends Preprocessor {
                                                } else {
                                                        $found = 'line-start';
                                                }
+                                       } elseif ( $curTwoChar == $currentClosing ) {
+                                               $found = 'close';
+                                               $curChar = $curTwoChar;
                                        } elseif ( $curChar == $currentClosing ) {
                                                $found = 'close';
+                                       } elseif ( isset( $this->rules[$curTwoChar] ) ) {
+                                               $curChar = $curTwoChar;
+                                               $found = 'open';
+                                               $rule = $this->rules[$curChar];
                                        } elseif ( isset( $this->rules[$curChar] ) ) {
                                                $found = 'open';
                                                $rule = $this->rules[$curChar];
+                                       } elseif ( $curChar == '-' ) {
+                                               $found = 'dash';
                                        } else {
                                                # Some versions of PHP have a strcspn which stops on null characters
                                                # Ignore and continue
@@ -595,7 +613,8 @@ class Preprocessor_DOM extends Preprocessor {
                                // input pointer.
                        } elseif ( $found == 'open' ) {
                                # count opening brace characters
-                               $count = strspn( $text, $curChar, $i );
+                               $curLen = strlen( $curChar );
+                               $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i );
 
                                # we need to add to stack only if opening brace count is enough for one of the rules
                                if ( $count >= $rule['min'] ) {
@@ -615,12 +634,13 @@ class Preprocessor_DOM extends Preprocessor {
                                        # Add literal brace(s)
                                        $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
                                }
-                               $i += $count;
+                               $i += $curLen * $count;
                        } elseif ( $found == 'close' ) {
                                $piece = $stack->top;
                                # lets check if there are enough characters for closing brace
                                $maxCount = $piece->count;
-                               $count = strspn( $text, $curChar, $i, $maxCount );
+                               $curLen = strlen( $curChar );
+                               $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount );
 
                                # check for maximum matching characters (if there are 5 closing
                                # characters, we will probably need only 3 - depending on the rules)
@@ -643,7 +663,7 @@ class Preprocessor_DOM extends Preprocessor {
                                        # No matching element found in callback array
                                        # Output a literal closing brace and continue
                                        $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
-                                       $i += $count;
+                                       $i += $curLen * $count;
                                        continue;
                                }
                                $name = $rule['names'][$matchingCount];
@@ -682,7 +702,7 @@ class Preprocessor_DOM extends Preprocessor {
                                }
 
                                # Advance input pointer
-                               $i += $matchingCount;
+                               $i += $curLen * $matchingCount;
 
                                # Unwind the stack
                                $stack->pop();
@@ -716,6 +736,9 @@ class Preprocessor_DOM extends Preprocessor {
                                $stack->getCurrentPart()->eqpos = strlen( $accum );
                                $accum .= '=';
                                ++$i;
+                       } elseif ( $found == 'dash' ) {
+                               $accum .= '-';
+                               ++$i;
                        }
                }
 
index 8a4637e..1317e60 100644 (file)
@@ -117,6 +117,8 @@ class Preprocessor_Hash extends Preprocessor {
         * @return PPNode_Hash_Tree
         */
        public function preprocessToObj( $text, $flags = 0 ) {
+               global $wgDisableLangConversion;
+
                $tree = $this->cacheGetTree( $text, $flags );
                if ( $tree !== false ) {
                        $store = json_decode( $tree );
@@ -152,6 +154,10 @@ class Preprocessor_Hash extends Preprocessor {
                $stack = new PPDStack_Hash;
 
                $searchBase = "[{<\n";
+               if ( !$wgDisableLangConversion ) {
+                       $searchBase .= '-';
+               }
+
                // For fast reverse searches
                $revText = strrev( $text );
                $lengthText = strlen( $text );
@@ -229,7 +235,10 @@ class Preprocessor_Hash extends Preprocessor {
                                                break;
                                        }
                                } else {
-                                       $curChar = $text[$i];
+                                       $curChar = $curTwoChar = $text[$i];
+                                       if ( ( $i + 1 ) < $lengthText ) {
+                                               $curTwoChar .= $text[$i + 1];
+                                       }
                                        if ( $curChar == '|' ) {
                                                $found = 'pipe';
                                        } elseif ( $curChar == '=' ) {
@@ -242,11 +251,20 @@ class Preprocessor_Hash extends Preprocessor {
                                                } else {
                                                        $found = 'line-start';
                                                }
+                                       } elseif ( $curTwoChar == $currentClosing ) {
+                                               $found = 'close';
+                                               $curChar = $curTwoChar;
                                        } elseif ( $curChar == $currentClosing ) {
                                                $found = 'close';
+                                       } elseif ( isset( $this->rules[$curTwoChar] ) ) {
+                                               $curChar = $curTwoChar;
+                                               $found = 'open';
+                                               $rule = $this->rules[$curChar];
                                        } elseif ( isset( $this->rules[$curChar] ) ) {
                                                $found = 'open';
                                                $rule = $this->rules[$curChar];
+                                       } elseif ( $curChar == '-' ) {
+                                               $found = 'dash';
                                        } else {
                                                # Some versions of PHP have a strcspn which stops on null characters
                                                # Ignore and continue
@@ -538,7 +556,8 @@ class Preprocessor_Hash extends Preprocessor {
                                // input pointer.
                        } elseif ( $found == 'open' ) {
                                # count opening brace characters
-                               $count = strspn( $text, $curChar, $i );
+                               $curLen = strlen( $curChar );
+                               $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i );
 
                                # we need to add to stack only if opening brace count is enough for one of the rules
                                if ( $count >= $rule['min'] ) {
@@ -557,12 +576,13 @@ class Preprocessor_Hash extends Preprocessor {
                                        # Add literal brace(s)
                                        self::addLiteral( $accum, str_repeat( $curChar, $count ) );
                                }
-                               $i += $count;
+                               $i += $curLen * $count;
                        } elseif ( $found == 'close' ) {
                                $piece = $stack->top;
                                # lets check if there are enough characters for closing brace
                                $maxCount = $piece->count;
-                               $count = strspn( $text, $curChar, $i, $maxCount );
+                               $curLen = strlen( $curChar );
+                               $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount );
 
                                # check for maximum matching characters (if there are 5 closing
                                # characters, we will probably need only 3 - depending on the rules)
@@ -585,7 +605,7 @@ class Preprocessor_Hash extends Preprocessor {
                                        # No matching element found in callback array
                                        # Output a literal closing brace and continue
                                        self::addLiteral( $accum, str_repeat( $curChar, $count ) );
-                                       $i += $count;
+                                       $i += $curLen * $count;
                                        continue;
                                }
                                $name = $rule['names'][$matchingCount];
@@ -627,7 +647,7 @@ class Preprocessor_Hash extends Preprocessor {
                                }
 
                                # Advance input pointer
-                               $i += $matchingCount;
+                               $i += $curLen * $matchingCount;
 
                                # Unwind the stack
                                $stack->pop();
@@ -661,6 +681,9 @@ class Preprocessor_Hash extends Preprocessor {
                                $accum[] = [ 'equals', [ '=' ] ];
                                $stack->getCurrentPart()->eqpos = count( $accum ) - 1;
                                ++$i;
+                       } elseif ( $found == 'dash' ) {
+                               self::addLiteral( $accum, '-' );
+                               ++$i;
                        }
                }
 
index 07d50a8..b18f1e7 100644 (file)
@@ -20595,6 +20595,17 @@ language=sr variant=sr-ec
 </p>
 !! end
 
+!! test
+T146304: Don't break template parsing if language converter markup is in the parameter.
+!! options
+language=sr variant=sr-ec
+!! wikitext
+{{echo|-{R|foo}-}}
+!! html/php
+<p>foo
+</p>
+!! end
+
 # FIXME: This test is currently broken in the PHP parser (bug 52661)
 !! test
 Don't break image parsing if language converter markup is in the caption.