Fixes Bug 31865 - Tag <dws> for discarding whitespaces.
authorMark A. Hershberger <mah@users.mediawiki.org>
Mon, 21 Nov 2011 01:45:23 +0000 (01:45 +0000)
committerMark A. Hershberger <mah@users.mediawiki.org>
Mon, 21 Nov 2011 01:45:23 +0000 (01:45 +0000)
Patch with parser tests from Van de Bugger

includes/parser/Preprocessor_DOM.php
includes/parser/Preprocessor_Hash.php
tests/parser/parserTests.txt

index 066589f..7803a70 100644 (file)
@@ -211,6 +211,9 @@ class Preprocessor_DOM implements Preprocessor {
                        $ignoredElements = array( 'includeonly' );
                        $xmlishElements[] = 'includeonly';
                }
+               // `dws' stands for "discard white spaces". `<dws>' and all the whitespaces afer it are
+               // discarded.
+               $xmlishElements[] = 'dws';
                $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 
                // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
@@ -406,6 +409,20 @@ class Preprocessor_DOM implements Preprocessor {
                                }
 
                                $tagStartPos = $i;
+
+                               // Handle tag `dws'.
+                               if ( $name == 'dws' ) {
+                                       $i = $tagEndPos + 1;
+                                       if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) {
+                                               $i += strlen( $matches[0] );
+                                       }
+                                       $accum .=
+                                               '<ignore>' .
+                                                       htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) ) .
+                                               '</ignore>';
+                                       continue;
+                               }
+
                                if ( $text[$tagEndPos-1] == '/' ) {
                                        $attrEnd = $tagEndPos - 1;
                                        $inner = null;
index 2934181..ad5155b 100644 (file)
@@ -153,6 +153,9 @@ class Preprocessor_Hash implements Preprocessor {
                        $ignoredElements = array( 'includeonly' );
                        $xmlishElements[] = 'includeonly';
                }
+               // `dws' stands for "discard white spaces". `<dws>' and all the whitespaces afer it are
+               // discarded.
+               $xmlishElements[] = 'dws';
                $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
 
                // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
@@ -350,6 +353,17 @@ class Preprocessor_Hash implements Preprocessor {
                                }
 
                                $tagStartPos = $i;
+
+                               // Handle tag dws.
+                               if ( $name == 'dws' ) {
+                                       $i = $tagEndPos + 1;
+                                       if ( preg_match( '/\s*/', $text, $matches, 0, $i ) ) {
+                                               $i += strlen( $matches[0] );
+                                       }
+                                       $accum->addNodeWithText( 'ignore', substr( $text, $tagStartPos, $i - $tagStartPos ) );
+                                       continue;
+                               }
+
                                if ( $text[$tagEndPos-1] == '/' ) {
                                        // Short end tag
                                        $attrEnd = $tagEndPos - 1;
index 9a7fd40..c331dd3 100644 (file)
@@ -8930,6 +8930,97 @@ title=[[MediaWiki:bug32450.css]]
 </p>
 !! end
 
+!! test
+Bug 31865: HTML-style tag <dws> is recognized and discarded.
+!! input
+one<dws>two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: XML-style tag <dws/> is recognized and discarded.
+!! input
+one<dws/>two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: Spaces after <dws> tag are discarded.
+!! input
+one<dws>   two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: Tabs after <dws> tag are discarded too.
+!! input
+one<dws>                       two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: Newlines after <dws> tag are discarded too.
+!! input
+one<dws>
+
+
+two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: Spaces before <dws> tag are not discarded.
+!! input
+one   <dws>two
+!! result
+<p>one   two
+</p>
+!! end
+
+!! test
+Bug 31865: <dws> Continuation is indented.
+!! input
+one<dws>
+       two
+!! result
+<p>onetwo
+</p>
+!! end
+
+!! test
+Bug 31865: <dws> List item continuation.
+!! input
+* one<dws>
+       two
+* three
+!! result
+<ul><li> onetwo
+</li><li> three
+</li></ul>
+
+!! end
+
+!! test
+Bug 31865: <dws/> XML-style; asterisk after the tag does not start list item. 
+!! input
+* one <dws/>
+* two
+!! result
+<ul><li> one * two
+</li></ul>
+
+!! end
+
 TODO:
 more images
 more tables