(bug 529, bug 12974) alter behaviour of the table- and template-parser:
authorHappy-melon <happy-melon@users.mediawiki.org>
Sun, 16 Jan 2011 23:57:13 +0000 (23:57 +0000)
committerHappy-melon <happy-melon@users.mediawiki.org>
Sun, 16 Jan 2011 23:57:13 +0000 (23:57 +0000)
* Remove the hack from bug 529 which inserts a newline when the template text begins with any block character; this breaks many things in many exciting ways.  I've left it in for now when the text begins with a wikitable, as that markup can't be mistaken for anything else.
* Instead, move the contents of a table cell onto a new line for parsing, so that linestart elements (including nested tables) will parse as normal.

This means that structures like

{|
| {{template-containing-wikilist}}
|}

Will still work, but for the right reason, and structures like

{|
| style="color:{{template-containing-hexcode}}" | Foo
| * Bar
| {|
   | Look at me, I'm nested!
  |}
|}

Will all now start to work.  Structures like

* Foo {{template-containing-wikilist}}

Will now not, but honestly, should they?

RELEASE-NOTES
includes/parser/Parser.php
tests/parser/parserTests.txt

index 9db072a..3b0e3be 100644 (file)
@@ -87,6 +87,11 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
   members.
 * (bug 2585) Image pages should send 404 if no image, no shared image and no
   description page.
+* (bug 12974) The behaviour of wikitable and template parsing was altered.  Previously,
+  all templates were treated as being at the start of a block context for the purposes
+  of evaluating wikitext elements like #, :, *, and tables (bug 529).  Now this 
+  behaviour is only applied to wikitable-start {|, but the first line of a wikitable
+  cell is now treated as a linestart.
 
 === API changes in 1.18 ===
 * (bug 26339) Throw warning when truncating an overlarge API result
index 7eadf39..2ea87e1 100644 (file)
@@ -825,7 +825,22 @@ class Parser {
                $has_opened_tr = array(); # Did this table open a <tr> element?
                $indent_level = 0; # indent level of the table
 
-               foreach ( $lines as $outLine ) {
+               # Keep pulling lines off the front of the array until they're all gone.
+               # we want to be able to push lines back on to the front of the stream,
+               # but StringUtils::explode() returns funky optimised Iterators which don't
+               # support insertion.  So maintain a separate buffer and draw on that first if
+               # there's anything in it
+               $extraLines = array();
+               $lines->rewind();
+               do {
+                       if( $extraLines ){
+                               $outLine = array_shift( $extraLines );
+                       } elseif( $lines->valid() ) {
+                               $outLine = $lines->current();
+                               $lines->next();
+                       } else {
+                               break;
+                       }
                        $line = trim( $outLine );
 
                        if ( $line === '' ) { # empty line, go to next line
@@ -901,11 +916,10 @@ class Parser {
                        } elseif ( $first_character === '|' || $first_character === '!' || substr( $line , 0 , 2 )  === '|+' ) {
                                # This might be cell elements, td, th or captions
                                if ( substr( $line , 0 , 2 ) === '|+' ) {
-                                       $first_character = '+';
-                                       $line = substr( $line , 1 );
+                                       $first_character = '|+';
                                }
 
-                               $line = substr( $line , 1 );
+                               $line = substr( $line , strlen( $first_character ) );
 
                                if ( $first_character === '!' ) {
                                        $line = str_replace( '!!' , '||' , $line );
@@ -916,62 +930,84 @@ class Parser {
                                # by earlier parser steps, but should avoid splitting up eg
                                # attribute values containing literal "||".
                                $cells = StringUtils::explodeMarkup( '||' , $line );
-
-                               $outLine = '';
-
-                               # Loop through each table cell
-                               foreach ( $cells as $cell ) {
-                                       $previous = '';
-                                       if ( $first_character !== '+' ) {
-                                               $tr_after = array_pop( $tr_attributes );
-                                               if ( !array_pop( $tr_history ) ) {
-                                                       $previous = "<tr{$tr_after}>\n";
-                                               }
-                                               array_push( $tr_history , true );
-                                               array_push( $tr_attributes , '' );
-                                               array_pop( $has_opened_tr );
-                                               array_push( $has_opened_tr , true );
+                               $cell = array_shift( $cells );
+
+                               # Inject cells back into the stream to be dealt with later
+                               # TODO: really we should do the whole thing as a stream...
+                               # but that would be too much like a sensible implementation :P
+                               if( count( $cells ) ){
+                                       foreach( array_reverse( $cells ) as $extraCell ){
+                                               array_unshift( $extraLines, $first_character . $extraCell );
                                        }
+                               }
 
-                                       $last_tag = array_pop( $last_tag_history );
+                               $outLine = '';
 
-                                       if ( array_pop( $td_history ) ) {
-                                               $previous = "</{$last_tag}>\n{$previous}";
+                               $previous = '';
+                               if ( $first_character !== '|+' ) {
+                                       $tr_after = array_pop( $tr_attributes );
+                                       if ( !array_pop( $tr_history ) ) {
+                                               $previous = "<tr{$tr_after}>\n";
                                        }
+                                       array_push( $tr_history , true );
+                                       array_push( $tr_attributes , '' );
+                                       array_pop( $has_opened_tr );
+                                       array_push( $has_opened_tr , true );
+                               }
 
-                                       if ( $first_character === '|' ) {
-                                               $last_tag = 'td';
-                                       } elseif ( $first_character === '!' ) {
-                                               $last_tag = 'th';
-                                       } elseif ( $first_character === '+' ) {
-                                               $last_tag = 'caption';
-                                       } else {
-                                               $last_tag = '';
-                                       }
+                               $last_tag = array_pop( $last_tag_history );
 
-                                       array_push( $last_tag_history , $last_tag );
+                               if ( array_pop( $td_history ) ) {
+                                       $previous = "</{$last_tag}>\n{$previous}";
+                               }
 
-                                       # A cell could contain both parameters and data
-                                       $cell_data = explode( '|' , $cell , 2 );
+                               if ( $first_character === '|' ) {
+                                       $last_tag = 'td';
+                               } elseif ( $first_character === '!' ) {
+                                       $last_tag = 'th';
+                               } elseif ( $first_character === '|+' ) {
+                                       $last_tag = 'caption';
+                               } else {
+                                       $last_tag = '';
+                               }
 
-                                       # Bug 553: Note that a '|' inside an invalid link should not
-                                       # be mistaken as delimiting cell parameters
-                                       if ( strpos( $cell_data[0], '[[' ) !== false ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell}";
-                                       } elseif ( count( $cell_data ) == 1 ) {
-                                               $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
-                                       } else {
-                                               $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
-                                               $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
-                                               $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
-                                       }
+                               array_push( $last_tag_history , $last_tag );
+
+                               # A cell could contain both parameters and data... but the pipe could
+                               # also be the start of a nested table, or a raw pipe inside an invalid
+                               # link (bug 553).  
+                               $cell_data = preg_split( '/(?<!\{)\|/', $cell, 2 );
+
+                               # Bug 553: a '|' inside an invalid link should not
+                               # be mistaken as delimiting cell parameters
+                               if ( strpos( $cell_data[0], '[[' ) !== false ) {
+                                       $data = $cell;
+                                       $cell = "{$previous}<{$last_tag}>";
+                               } elseif ( count( $cell_data ) == 1 ) {
+                                       $cell = "{$previous}<{$last_tag}>";
+                                       $data = $cell_data[0];
+                               } else {
+                                       $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
+                                       $attributes = Sanitizer::fixTagAttributes( $attributes , $last_tag );
+                                       $cell = "{$previous}<{$last_tag}{$attributes}>";
+                                       $data = $cell_data[1];
+                               }
 
-                                       $outLine .= $cell;
-                                       array_push( $td_history , true );
+                               # Bug 529: the start of a table cell should be a linestart context for
+                               # processing other block markup, including nested tables.  The original
+                               # implementation of this was to add a newline before every brace construct,
+                               # which broke all manner of other things.  Instead, push the contents
+                               # of the cell back into the stream and come back to it later.  But don't
+                               # do that if the first line is empty, or you may get extra whitespace
+                               if( $data ){
+                                       array_unshift( $extraLines, trim( $data ) );
                                }
+
+                               $outLine .= $cell;
+                               array_push( $td_history , true );
                        }
                        $out .= $outLine . "\n";
-               }
+               } while( $lines->valid() || count( $extraLines ) );
 
                # Closing open td, tr && table
                while ( count( $td_history ) > 0 ) {
@@ -2235,6 +2271,7 @@ class Parser {
                                        '/(?:<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
                                        '<td|<th|<\\/?div|<hr|<\\/pre|<\\/p|'.$this->mUniqPrefix.'-pre|<\\/li|<\\/ul|<\\/ol|<\\/?center)/iS', $t );
                                if ( $openmatch or $closematch ) {
+
                                        $paragraphStack = false;
                                        # TODO bug 5718: paragraph closed
                                        $output .= $this->closeParagraph();
@@ -3225,11 +3262,11 @@ class Parser {
                        $text = wfEscapeWikiText( $text );
                } elseif ( is_string( $text )
                        && !$piece['lineStart']
-                       && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) )
+                       && preg_match( '/^{\\|/', $text ) )
                {
-                       # Bug 529: if the template begins with a table or block-level
-                       # element, it should be treated as beginning a new line.
-                       # This behaviour is somewhat controversial.
+                       # Bug 529: if the template begins with a table, it should be treated as
+                       # beginning a new line.  This previously handled other block-level elements
+                       # such as #, :, etc, but these have many false-positives (bug 12974).
                        $text = "\n" . $text;
                }
 
@@ -3288,7 +3325,7 @@ class Parser {
 
                if ( !$title->equals( $cacheTitle ) ) {
                        $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
-                               array( $title->getNamespace(), $cdb = $title->getDBkey() );
+                               array( $title->getNamespace(), $title->getDBkey() );
                }
 
                return array( $dom, $title );
index c48be92..5736d8c 100644 (file)
@@ -1210,7 +1210,8 @@ A table with nothing but a caption
 |}
 !! result
 <table>
-<caption> caption
+<caption>
+caption
 </caption><tr><td></td></tr></table>
 
 !! end
@@ -1226,12 +1227,22 @@ Simple table
 !! result
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
+<td>
+<p>1
+</p>
+</td>
+<td>
+<p>2
+</p>
 </td></tr>
 <tr>
-<td> 3 </td>
-<td> 4
+<td>
+<p>3
+</p>
+</td>
+<td>
+<p>4
+</p>
 </td></tr></table>
 
 !! end
@@ -1261,48 +1272,110 @@ Multiplication table
 |}
 !! result
 <table border="1" cellpadding="2">
-<caption>Multiplication table
+<caption>
+Multiplication table
 </caption>
 <tr>
-<th> &times; </th>
-<th> 1 </th>
-<th> 2 </th>
-<th> 3
+<th>
+<p>&times;
+</p>
+</th>
+<th>
+<p>1
+</p>
+</th>
+<th>
+<p>2
+</p>
+</th>
+<th>
+<p>3
+</p>
 </th></tr>
 <tr>
-<th> 1
+<th>
+<p>1
+</p>
 </th>
-<td> 1 </td>
-<td> 2 </td>
-<td> 3
+<td>
+<p>1
+</p>
+</td>
+<td>
+<p>2
+</p>
+</td>
+<td>
+<p>3
+</p>
 </td></tr>
 <tr>
-<th> 2
+<th>
+<p>2
+</p>
 </th>
-<td> 2 </td>
-<td> 4 </td>
-<td> 6
+<td>
+<p>2
+</p>
+</td>
+<td>
+<p>4
+</p>
+</td>
+<td>
+<p>6
+</p>
 </td></tr>
 <tr>
-<th> 3
+<th>
+<p>3
+</p>
 </th>
-<td> 3 </td>
-<td> 6 </td>
-<td> 9
+<td>
+<p>3
+</p>
+</td>
+<td>
+<p>6
+</p>
+</td>
+<td>
+<p>9
+</p>
 </td></tr>
 <tr>
-<th> 4
+<th>
+<p>4
+</p>
 </th>
-<td> 4 </td>
-<td> 8 </td>
-<td> 12
+<td>
+<p>4
+</p>
+</td>
+<td>
+<p>8
+</p>
+</td>
+<td>
+<p>12
+</p>
 </td></tr>
 <tr>
-<th> 5
+<th>
+<p>5
+</p>
 </th>
-<td> 5 </td>
-<td> 10 </td>
-<td> 15
+<td>
+<p>5
+</p>
+</td>
+<td>
+<p>10
+</p>
+</td>
+<td>
+<p>15
+</p>
 </td></tr></table>
 
 !! end
@@ -1321,16 +1394,26 @@ Table rowspan
 !! result
 <table align="right" border="1">
 <tr>
-<td> Cell 1, row 1
+<td>
+<p>Cell 1, row 1
+</p>
 </td>
-<td rowspan="2"> Cell 2, row 1 (and 2)
+<td rowspan="2">
+<p>Cell 2, row 1 (and 2)
+</p>
 </td>
-<td> Cell 3, row 1
+<td>
+<p>Cell 3, row 1
+</p>
 </td></tr>
 <tr>
-<td> Cell 1, row 2
+<td>
+<p>Cell 1, row 2
+</p>
 </td>
-<td> Cell 3, row 2
+<td>
+<p>Cell 3, row 2
+</p>
 </td></tr></table>
 
 !! end
@@ -1351,18 +1434,26 @@ Nested table
 !! result
 <table border="1">
 <tr>
-<td> &alpha;
+<td>
+<p>&alpha;
+</p>
 </td>
 <td>
 <table bgcolor="#ABCDEF" border="2">
 <tr>
-<td>nested
+<td>
+<p>nested
+</p>
 </td></tr>
 <tr>
-<td>table
+<td>
+<p>table
+</p>
 </td></tr></table>
 </td>
-<td>the original table again
+<td>
+<p>the original table again
+</p>
 </td></tr></table>
 
 !! end
@@ -1376,7 +1467,9 @@ Invalid attributes in table cell (bug 1830)
 !! result
 <table>
 <tr>
-<td>broken
+<td>
+<p>broken
+</p>
 </td></tr></table>
 
 !! end
@@ -1390,8 +1483,13 @@ Table security: embedded pipes (http://lists.wikimedia.org/mailman/htdig/wikitec
 !! result
 <table>
 <tr>
-<td>[<a href="ftp://%7Cx" class="external free" rel="nofollow">ftp://%7Cx</a></td>
-<td>]" onmouseover="alert(document.cookie)"&gt;test
+<td>
+<p>[<a href="ftp://%7Cx" class="external free" rel="nofollow">ftp://%7Cx</a>
+</p>
+</td>
+<td>
+<p>]" onmouseover="alert(document.cookie)"&gt;test
+</p>
 </td>
 </tr>
 </table>
@@ -2571,7 +2669,9 @@ BUG 553: link with two variables in a piped link
 !! result
 <table>
 <tr>
-<td>[[{{{1}}}|{{{2}}}]]
+<td>
+<p>[[{{{1}}}|{{{2}}}]]
+</p>
 </td></tr></table>
 
 !! end
@@ -2681,12 +2781,22 @@ foo {{table}}
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
+<td>
+<p>1
+</p>
+</td>
+<td>
+<p>2
+</p>
 </td></tr>
 <tr>
-<td> 3 </td>
-<td> 4
+<td>
+<p>3
+</p>
+</td>
+<td>
+<p>4
+</p>
 </td></tr></table>
 
 !! end
@@ -2701,12 +2811,22 @@ foo
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
+<td>
+<p>1
+</p>
+</td>
+<td>
+<p>2
+</p>
 </td></tr>
 <tr>
-<td> 3 </td>
-<td> 4
+<td>
+<p>3
+</p>
+</td>
+<td>
+<p>4
+</p>
 </td></tr></table>
 
 !! end
@@ -4287,7 +4407,9 @@ Table multiple attributes correction
 !! result
 <table>
 <tr>
-<th class="awesome"> status
+<th class="awesome">
+<p>status
+</p>
 </th></tr></table>
 
 !!end
@@ -4741,7 +4863,9 @@ Table attribute legitimate extension
 !! result
 <table>
 <tr>
-<th style="color:blue"> status
+<th style="color:blue">
+<p>status
+</p>
 </th></tr></table>
 
 !!end
@@ -4755,7 +4879,9 @@ Table attribute safety
 !! result
 <table>
 <tr>
-<th style="/* insecure input */"> status
+<th style="/* insecure input */">
+<p>status
+</p>
 </th></tr></table>
 
 !! end
@@ -5424,9 +5550,14 @@ noxml
 !! result
 <table>
 <tr>
-<th>https://</th>
-<th></th>
-<th></th>
+<th>
+<p>https://
+</p>
+</th>
+<th>
+</th>
+<th>
+</th>
 <th>
 </td>
 </tr>
@@ -5443,7 +5574,9 @@ Fuzz testing: Parser21
 !! result
 <table>
 <tr>
-<th> <a href="irc://{{ftp://a" class="external free" rel="nofollow">irc://{{ftp://a</a>" onmouseover="alert('hello world');"
+<th>
+<p><a href="irc://{{ftp://a" class="external free" rel="nofollow">irc://{{ftp://a</a>" onmouseover="alert('hello world');"
+</p>
 </th>
 <td>
 </td>
@@ -5489,7 +5622,9 @@ MOVE YOUR MOUSE CURSOR OVER THIS TEXT
 
 MOVE YOUR MOUSE CURSOR OVER THIS TEXT
 <tr>
-<td></u>
+<td>
+<p></u>
+</p>
 </td>
 </tr>
 </table>
@@ -7604,18 +7739,17 @@ Template:Bullet
 !!endarticle
 
 !! test
-Bug 529: Uncovered bullet
+Bug 529/12974: Uncovered bullet
 !! input
 * Foo {{bullet}}
 !! result
-<ul><li> Foo 
-</li><li> Bar
+<ul><li> Foo * Bar
 </li></ul>
 
 !! end
 
 !! test
-Bug 529: Uncovered table already at line-start
+Bug 529/12974: Uncovered table already at line-start
 !! input
 x
 
 </p>
 <table>
 <tr>
-<td> 1 </td>
-<td> 2
+<td>
+<p>1
+</p>
+</td>
+<td>
+<p>2
+</p>
 </td></tr>
 <tr>
-<td> 3 </td>
-<td> 4
+<td>
+<p>3
+</p>
+</td>
+<td>
+<p>4
+</p>
 </td></tr></table>
 <p>y
 </p>
 !! end
 
 !! test
-Bug 529: Uncovered bullet in parser function result
+Bug 529/12974: Uncovered bullet in parser function result
 !! input
 * Foo {{lc:{{bullet}} }}
 !! result
-<ul><li> Foo 
-</li><li> bar
+<ul><li> Foo * bar
 </li></ul>
 
 !! end