From 5bd4f88169e2fc5a45224d1518f989974ddf6f5d Mon Sep 17 00:00:00 2001 From: "Mark A. Hershberger" Date: Mon, 21 Nov 2011 01:45:23 +0000 Subject: [PATCH] Fixes Bug 31865 - Tag for discarding whitespaces. Patch with parser tests from Van de Bugger --- includes/parser/Preprocessor_DOM.php | 17 +++++ includes/parser/Preprocessor_Hash.php | 14 +++++ tests/parser/parserTests.txt | 91 +++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php index 066589f685..7803a70ec0 100644 --- a/includes/parser/Preprocessor_DOM.php +++ b/includes/parser/Preprocessor_DOM.php @@ -211,6 +211,9 @@ class Preprocessor_DOM implements Preprocessor { $ignoredElements = array( 'includeonly' ); $xmlishElements[] = 'includeonly'; } + // `dws' stands for "discard white spaces". `' and all the whitespaces afer it are + // discarded. + $xmlishElements[] = 'dws'; $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset @@ -406,6 +409,20 @@ class Preprocessor_DOM implements Preprocessor { } $tagStartPos = $i; + + // Handle tag `dws'. + if ( $name == 'dws' ) { + $i = $tagEndPos + 1; + if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) { + $i += strlen( $matches[0] ); + } + $accum .= + '' . + htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) . + ''; + continue; + } + if ( $text[$tagEndPos-1] == '/' ) { $attrEnd = $tagEndPos - 1; $inner = null; diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index 2934181a50..ad5155b8e2 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -153,6 +153,9 @@ class Preprocessor_Hash implements Preprocessor { $ignoredElements = array( 'includeonly' ); $xmlishElements[] = 'includeonly'; } + // `dws' stands for "discard white spaces". `' and all the whitespaces afer it are + // discarded. + $xmlishElements[] = 'dws'; $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset @@ -350,6 +353,17 @@ class Preprocessor_Hash implements Preprocessor { } $tagStartPos = $i; + + // Handle tag dws. + if ( $name == 'dws' ) { + $i = $tagEndPos + 1; + if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) { + $i += strlen( $matches[0] ); + } + $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) ); + continue; + } + if ( $text[$tagEndPos-1] == '/' ) { // Short end tag $attrEnd = $tagEndPos - 1; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 9a7fd40dfb..c331dd31a2 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -8930,6 +8930,97 @@ title=[[MediaWiki:bug32450.css]]

!! end +!! test +Bug 31865: HTML-style tag is recognized and discarded. +!! input +onetwo +!! result +

onetwo +

+!! end + +!! test +Bug 31865: XML-style tag is recognized and discarded. +!! input +onetwo +!! result +

onetwo +

+!! end + +!! test +Bug 31865: Spaces after tag are discarded. +!! input +one two +!! result +

onetwo +

+!! end + +!! test +Bug 31865: Tabs after tag are discarded too. +!! input +one two +!! result +

onetwo +

+!! end + +!! test +Bug 31865: Newlines after tag are discarded too. +!! input +one + + +two +!! result +

onetwo +

+!! end + +!! test +Bug 31865: Spaces before tag are not discarded. +!! input +one two +!! result +

one two +

+!! end + +!! test +Bug 31865: Continuation is indented. +!! input +one + two +!! result +

onetwo +

+!! end + +!! test +Bug 31865: List item continuation. +!! input +* one + two +* three +!! result +
  • onetwo +
  • three +
+ +!! end + +!! test +Bug 31865: XML-style; asterisk after the tag does not start list item. +!! input +* one +* two +!! result +
  • one * two +
+ +!! end + TODO: more images more tables -- 2.20.1