From: Kunal Mehta Date: Fri, 9 Feb 2018 20:10:07 +0000 (-0800) Subject: Use RemexHtml as the tidy implementation for parser tests X-Git-Tag: 1.31.0-rc.0~435^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=bd912292041cfb92e45eeffb7ca0f06cca267b98 Use RemexHtml as the tidy implementation for parser tests * RemexHtml is the future of "tidy" in MediaWiki, so run our parser tests using it. * This is a necessary step before we can make it the default in MediaWiki (T185753). * Cleaned up a bunch of tests: (a) where html/php+tidy and html/parsoid match up, retained a html+tidy section and removed the others. (b) where html/php and html/php+tidy match up, retained the html/php section and removed the html/php+tidy section. * Annotating tests with explanations where Parsoid & Remex output differ. This is usually because of two reasons: (a) Parsoid has Tidy-emulation code in some cases (which we can consider stripping away separately). (b) Parsoid does a bunch of cleanup on the DOM (which was probably done to emulate Tidy output, but which could probably be retained). Since Parsoid (in some form) will be default parser in the future, no reason to try to port this cleanup (in broken markup scenarios) into Remex. * Left a bunch of FIXMEs for later followup. Unrelated cleanup: * Renamed a few tests since the functionality in Parsoid was fixed up. There is no more "implicit " support. Those all now lead to fostered content. * Fixed some clearly broken output in html/parsoid sections for some tests. Co-Authored-by: Kunal Mehta Co-Authored-by: Subramanya Sastry Bug: T188167 Depends-On: I646dbabb3c2ed28c1ea72c5bd8f7f92d03f57c75 Change-Id: Ic7c34d57a300dbd36a37f03fbfe33391b2950b44 --- diff --git a/tests/parser/TidySupport.php b/tests/parser/TidySupport.php index c0a9312ba9..559960def5 100644 --- a/tests/parser/TidySupport.php +++ b/tests/parser/TidySupport.php @@ -32,7 +32,7 @@ class TidySupport { * @param bool $useConfiguration */ public function __construct( $useConfiguration = false ) { - global $IP, $wgUseTidy, $wgTidyBin, $wgTidyInternal, $wgTidyConfig, + global $wgUseTidy, $wgTidyBin, $wgTidyInternal, $wgTidyConfig, $wgTidyConf, $wgTidyOpts; $this->enabled = true; @@ -55,26 +55,7 @@ class TidySupport { $this->enabled = false; } } else { - $this->config = [ - 'tidyConfigFile' => "$IP/includes/tidy/tidy.conf", - 'tidyCommandLine' => '', - ]; - if ( extension_loaded( 'tidy' ) && ( wfIsHHVM() || class_exists( 'tidy' ) ) ) { - $this->config['driver'] = wfIsHHVM() ? 'RaggettInternalHHVM' : 'RaggettInternalPHP'; - } else { - if ( is_executable( $wgTidyBin ) ) { - $this->config['driver'] = 'RaggettExternal'; - $this->config['tidyBin'] = $wgTidyBin; - } else { - $path = ExecutableFinder::findInDefaultPaths( $wgTidyBin ); - if ( $path !== false ) { - $this->config['driver'] = 'RaggettExternal'; - $this->config['tidyBin'] = $wgTidyBin; - } else { - $this->enabled = false; - } - } - } + $this->config = [ 'driver' => 'RemexHtml' ]; } if ( !$this->enabled ) { $this->config = [ 'driver' => 'disabled' ]; diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index e6fa203419..ad69b9fb9b 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -572,13 +572,6 @@ http://fr.wikipedia.org/wiki/🍺

!! end -# Note that the html+tidy output removes the spaces after the
  • , -# which is a bug (https://sourceforge.net/p/tidy/bugs/945/, etc). -# This is an issue for all tests with lists. We intentionally do -# *not* add html+tidy clauses for these, as we don't want to -# document/test the broken behavior. (Parsoid matches the non-tidy -# output in these cases.) - !! test Simple list !! wikitext @@ -1149,8 +1142,7 @@ The ''[[Main Page]]'''s talk page. !! end !! test -Parsoid only: Quote balancing context should be restricted to td/th cells on the same wikitext line -(Requires tidy for PHP parser output to be fixed up) +Quote balancing context should be restricted to td/th cells on the same wikitext line !! options parsoid=wt2html,wt2wt !! wikitext @@ -1158,20 +1150,15 @@ parsoid=wt2html,wt2wt !''a!!''b |''a||''b |} -!! html/php+tidy +!! html+tidy - + - + - - -
    abb + ab
    -!! html/parsoid - - - -
    abab
    +b + !! end ### @@ -1334,11 +1321,8 @@ Non-word characters don't terminate tag names (T19663, T42670, T54022)

    !! end -# There is a tidy bug here: https://sourceforge.net/p/tidy/bugs/946/ -# If the non-word-character tag made it through the sanitizer, tidy -# would munge it up. !! test -Non-word characters don't terminate tag names + tidy +Non-word characters don't terminate tag names !! wikitext a @@ -1352,12 +1336,13 @@ Non-word characters don't terminate tag names + tidy !! html+tidy -

    <blockquote|>a

    -

    <b→> doesn't terminate </b→>

    -

    <bä> doesn't terminate </bä>

    -

    <boo> doesn't terminate </boo>

    -

    <s.foo> doesn't terminate </s.foo>

    -

    <sub-ID#1>

    +

    <blockquote|>a +

    <b→> doesn't terminate </b→> +

    <bä> doesn't terminate </bä> +

    <boo> doesn't terminate </boo> +

    <s.foo> doesn't terminate </s.foo> +

    <sub-ID#1> +

    !! end ### @@ -1390,7 +1375,9 @@ parsoid=wt2html s !! html/php+tidy -

    <s.foo>s

    +

    +

    <s.foo>s +

    !! html/parsoid

    <s.foo>s

    !! end @@ -1518,7 +1505,8 @@ Entities inside template parameters !! wikitext {{echo|–}} !! html/php+tidy -

    –

    +

    – +

    !! html/parsoid

    !! end @@ -1911,21 +1899,23 @@ a
    foo

    b

    !! html+tidy -

    a

    -
    foo
    -

    b

    +

    a

    foo
    +

    b +

    !! end +# Remex wraps empty tag runs with p-tags. +# Parsoid strips them out during p-wrapping. !! test No p-wrappable content !! wikitext
    x
    x
    x
    -!! html+tidy -
    x
    -
    x
    -
    x
    +!! html/php+tidy +
    x
    +
    x
    +

    x
    !! html/parsoid
    x
    x
    @@ -1954,11 +1944,9 @@ a
    foo

    b

    !! html+tidy -

    a

    -
    -

    foo

    -
    -

    b

    +

    a

    foo

    +

    b +

    !! end !! test @@ -1972,10 +1960,8 @@ a
    foo
    b
    foo
    !! html+tidy -

    a

    -
    foo
    -

    b

    -
    foo
    +

    a

    foo

    +b

    foo
    !! end !! test @@ -1989,14 +1975,8 @@ a
    foo
    b
    foo
    !! html+tidy -

    a

    -
    -

    foo

    -
    -

    b

    -
    -

    foo

    -
    +

    a

    foo

    +b

    foo

    !! end !! test @@ -2016,19 +1996,21 @@ d e x
    foo
    z !! html+tidy -
    foo
    -

    a

    -

    b c d e

    -

    x

    -
    foo
    -

    z

    +
    foo

    a +

    b +c +d e +

    +x

    foo

    z +

    !! end -# Tidy strips out the empty
    tags. Parsoid doesn't. -# So, we have a separate section for Parsoid. We don't want -# to mimic this stripping behavior in Parsoid. It affects -# editing experience and also requires us to maintain additional -# info for RT-ing. +# The difference between Parsoid & Remex here +# is because of Parsoid's Tidy-emulation code +# for p-wrapping. We'll start work to remove this +# emulation code in Parsoid sooner than later. +# Remex wraps empty tag runs with p-tags. +# Parsoid strips them out in a separate pass. !! test Empty lines between lines with block tags !! wikitext @@ -2058,14 +2040,16 @@ b
    e
    !! html+tidy -


    -

    a

    -

    b

    -
    a
    -

    b

    -
    b
    -

    d

    -


    +
    +


    +

    +

    a +

    b +

    +
    a

    b +

    b

    d +


    +

    e
    !! html/parsoid
    @@ -2082,7 +2066,6 @@ b
    e
    !! end -## PHP parser emits output which is broken !! test Unclosed HTML p-tags should be handled properly !! wikitext @@ -2091,11 +2074,10 @@ a b !! html/php+tidy -
    -

    foo

    -
    -

    a

    -

    b

    +

    foo

    +

    a +

    b +

    !! html/parsoid

    foo

    a

    @@ -2276,9 +2258,6 @@ Foo bar baz quux !! end -# Note that the p-wrapping is newline sensitive, which could be -# considered a bug: tidy will wrap only the 'Foo' in the example -# below in a

    tag. (see comment 23-25 of T8200) !! test T17491: / in blockquote (2) !! wikitext @@ -2289,9 +2268,8 @@ T17491: / in blockquote (2) !! html+tidy -

    -

    Foo

    -bar baz quux
    +

    Foo

    bar baz

    quux +

    !! end !! test @@ -2426,7 +2404,6 @@ parsoid=wt2html

    !! end -# Parsoid doesn't strip empty tags, like Tidy does. !! test Empty pre; pre inside other HTML tags (T56946) !! wikitext @@ -2436,20 +2413,12 @@ a foo
    
    -!! html/php
    +!! html/php+tidy
     

    a

    -
    -foo
    +
    foo
     
    
    -
    -!! html/php+tidy
    -

    a

    -
    -
    -foo
    -
    !! html/parsoid

    a

    @@ -2469,16 +2438,12 @@ HTML pre followed by indent-pre
    !! end -# Note that tidy removes the empty

    tags from the start and end. -# Parsoid does not, by design. !! test Block tag pre !! wikitext

    foo

    !! html/php+tidy -
    -foo
    -
    +

    foo

    !! html/parsoid

    foo

    !! end @@ -2641,10 +2606,8 @@ parsoid=wt2html <pre
    !! html/php+tidy -
    -x
    -
    -

    <pre

    +
    x
    +<pre
    !! html/parsoid
    x
    @@ -2938,7 +2901,8 @@ Templates: Parsoid parameter escaping test 1 !! wikitext {{echo|[foo]|{{echo|[bar]}}}} !! html/php+tidy -

    [foo]

    +

    [foo] +

    !! html/parsoid

    [foo]

    @@ -2949,7 +2913,8 @@ Parsoid: Pipes in external links in template parameter !! wikitext {{echo|[{{echo|http://example.com}} link]}} !! html/php+tidy -

    link

    +

    link +

    !! html/parsoid

    link

    !! end @@ -2959,7 +2924,8 @@ Parsoid: pipe in transclusion parameter !! wikitext {{echo|http://foo.com/a|b}} !! html/php+tidy -

    http://foo.com/a%7Cb

    +

    http://foo.com/a%7Cb +

    !! html/parsoid

    a|b

    +

    a|b +

    !! html/parsoid

    <div>}} {{echo|}} !! html/php+tidy -

    foo|bar <div>

    +

    foo|bar +<div> + +

    !! html/parsoid

    foo|bar <div> @@ -3016,7 +2986,8 @@ parsoid=html2wt,wt2wt !! wikitext {{echo|{{echo|1=bar}}}} !! html/php+tidy -

    bar

    +

    bar +

    !! html/parsoid

    bar

    !! end @@ -3027,7 +2998,8 @@ Templates parameters with special tokenizing behavior dont get modified because !! wikitext {{echo|a : b}} !! html/php+tidy -

    a : b

    +

    a : b +

    !! html/parsoid

    a : b

    !! end @@ -3038,7 +3010,8 @@ Templates: Preserve blank parameter names !! wikitext {{echo|=foo}} !! html/php+tidy -

    {{{1}}}

    +

    {{{1}}} +

    !! html/parsoid

    {{{1}}}

    !! end @@ -3048,7 +3021,9 @@ Templates: Preserve blank parameter names in other positions !! wikitext {{blank_param|bar|=foo}} !! html/php+tidy -

    bar foo

    +

    bar +foo +

    !! html/parsoid

    bar foo

    @@ -3260,17 +3235,11 @@ a

    c

    foo
     foo 
     
    -!! html+tidy -

    a

    -

    foo

    -

    b

    -
    foo
    -

    c

    -
    -

    foo

    -
    -
    - foo 
    +!! html/php+tidy
    +

    a

    foo

    + b

    foo

    + c

    foo

    +
     foo 
     
    !! end @@ -3287,12 +3256,10 @@ a !! html/parsoid
    a foo
    b
    foo
    -!! html+tidy -
    -a foo
    -
    -

    b

    -
    foo
    +!! html/php+tidy +
    a foo
    +

    + b

    foo
    !!end !!test @@ -3527,7 +3494,8 @@ a b !! html/php+tidy -

    a b

    +

    a b +

    !! html/parsoid

    a @@ -7164,7 +6966,7 @@ Wikitext table with html-syntax row !! end !! test -Implicit after a |- +Fostered content in tables: Plain text !! options parsoid=wt2html,html2html !! wikitext @@ -7179,7 +6981,10 @@ a !! html/php+tidy -

    a

    + + +a +
    !! html/parsoid

    a

    @@ -7188,7 +6993,7 @@ a !! end !! test -Lists should be recognized in an implicit
    context +Fostered content in tables: Lists !! options parsoid=wt2html,html2html !! wikitext @@ -7203,9 +7008,10 @@ parsoid=wt2html,html2html
    !! html/php+tidy -
      -
    • a
    • -
    +
    • a
    + + +
    !! html/parsoid
    • a
    @@ -7214,7 +7020,7 @@ parsoid=wt2html,html2html !! end !! test -Table cells not properly parsed in an implicit-td context +Template generated table cell with attributes !! wikitext {| |- @@ -7222,12 +7028,12 @@ Table cells not properly parsed in an implicit-td context |} !! html/php+tidy
    - - - - - -
    ab
    + + + + a + b + !! html/parsoid @@ -7248,17 +7054,14 @@ parsoid=wt2html,wt2wt |}quux !! html+tidy
    - - - -
    foo
    -

    bar

    - - - - -
    baz
    -

    quux

    + +foo +

    bar +

    + +
    baz +

    quux +

    !! end !! test @@ -7330,12 +7133,11 @@ foo |} !!html/php+tidy - + - -
    -

    foo

    -
    +

    foo +

    + !!html/parsoid @@ -7409,24 +7211,28 @@ parsoid=html2wt |} !! html/php+tidy
    - - - - - + + + + - - - + + - - - + + - - - -
    Test
    MonthSavings
    Test +
    Month +Savings +
    January$100
    January +$100 +
    February$80
    February +$80 +
    Sum$180
    +Sum + +$180 + !! end # T137406: No whitespace in the HTML @@ -8239,11 +8045,9 @@ T2337: Escaped self-links should be bold title=[[Bug462]] !! wikitext [[Bug462]] [[Bug462]] -!! html/php +!! html/php+tidy

    Bug462 Bug462

    -!! html/php+tidy -

    Bug462 Bug462

    !! html/parsoid

    Bug462 Bug462

    !! end @@ -8684,7 +8488,7 @@ parsoid=wt2html,wt2wt -!! html+tidy +!! html/php+tidy
    • Wikipedia:ro:OlteniÅ£a
    • Wikipedia:ro:OlteniÅ£a
    • @@ -9475,8 +9279,9 @@ parsoid=wt2html
      !! html+tidy -


      -


      +


      +


      +

      !! end !! test @@ -9568,10 +9373,9 @@ foo
      bar !! html+tidy
      -
      -

      foo

      -
      -

      bar

      +

      +foo


      bar +

      !! end !! test @@ -9620,8 +9424,8 @@ Horizontal ruler -- Supports content following dashes on same line
      Foo !! html+tidy -
      -

      Foo

      +

      Foo +

      !! end ### @@ -9922,11 +9726,10 @@ Multiple list tags generated by templates !! html+tidy -
        -
      • a
      • -
      • b
      • -
      • c
      • -
      +
    • a +
    • b +
    • c +
    • !!end !!test @@ -9966,9 +9769,10 @@ Replacing whitespace with tabs still doesn't break the list (gerrit 78327) !!end +# FIXME: Parsoid has a dedicated DOM pass to mimic this Tidy-specific li-hack +# That pass could possibly be removed. !!test -Test the li-hack -(The PHP parser relies on Tidy for the hack) +Test the li-hack (a hack from Tidy days, but doesn't work as advertised with Remex) !!options parsoid=wt2html,wt2wt !! wikitext @@ -9982,14 +9786,13 @@ parsoid=wt2html,wt2wt
    !! html+tidy +
    • foo
    • +
    • li-hack
    • +
    • templated li-hack
    • +
    • unsupported li-hack with preceding comments
      -
    • foo
    • -
    • li-hack
    • -
    • templated li-hack
    • -
    • unsupported li-hack with preceding comments
    • -
    -
      -
    • not a li-hack
    • +
    • not a li-hack +
    !! html/parsoid
    • foo
    • @@ -10048,69 +9851,71 @@ parsoid # tags (parse, minimize scope of fixup, and roundtrip back) # ------------------------------------------------------------------------ +# Remex and Parsoid output stems from list handling diffs because Parsoid & PHP parser. +# Parsoid's list handling is more aware of block structure. !! test Unbalanced closing block tags break a list -(php parser relies on Tidy to fix up) !! wikitext
      *a
      *b
      !! html+tidy
      -
        +
        • a
      +
    • b
    • +!! html+parsoid +
      • a
      • -
      -
      -
      -
        +
      +
      • b
      • -
      -
      +
    !! end -# Parsoid fails this test, but it might be tricky to support properly. -# See T70395. !! test Unbalanced closing non-block tags don't break a list -(php parser relies on Tidy to fix up) !! wikitext *a *b !! html/php+tidy -
      -
    • a
    • -
    • b
    • -
    +

    +

    +
    • a
    • +
    • b
    !! html/parsoid
      -
    • a -
    • -
    • b -
    • +
    • a
    • +
    • b
    !! end +# Parsoid does some post-dom-building cleanup +# which is why its output differs from Remex. !! test Unclosed formatting tags that straddle lists are closed and reopened -(php parser relies on Tidy to fix up) !! options parsoid=wt2html,wt2wt,html2html !! wikitext # a # b !! html/php+tidy -
      -
    1. a
    2. -
    3. b
    4. -
    +
    1. a
    2. +
    3. b
    !! html/parsoid -
    1. a
    2. +
      1. a
      2. b
      !! end +# Output is ugly because of all the misnested tag fixups. +# Remex is wrapping p-tags around empty elements. +# Parsoid has special-case handling of this pattern of +# wrapping lists in formatting tags. +# FIXME: Should we remove this code from Parsoid? Or add +# special support in Remex? If the latter, maybe just wait +# for Parsoid to become the default parser. # See T70395. !!test 1. List embedded in a formatting tag @@ -10119,9 +9924,9 @@ parsoid=wt2html,wt2wt,html2html * foo !! html/php+tidy -
        -
      • foo
      • -
      +

      +

      • foo

      +

      !! html/parsoid
        @@ -10130,19 +9935,19 @@ parsoid=wt2html,wt2wt,html2html !!end -## Ugly Parsoid output here -## Not sure what the right output is. +# Output is ugly because of all the misnested tag fixups +# Remex is wrapping p-tags around empty elements. +# Parsoid has code that strips useless p-tags. !!test -2. List embedded in a formatting tag +2. List embedded in a formatting tag in a misnested way !! wikitext *a *b !! html/php+tidy -
          -
        • a
        • -
        • b
        • -
        +

        +

        • a
        • +
        • b
        !! html/parsoid
          @@ -10152,31 +9957,6 @@ parsoid=wt2html,wt2wt,html2html
        !!end -# Ugly Parsoid and PHP parser output here -# Not sure if we want to make this a test! -# -## !!test -## 3. Unclosed formatting tags in list elements -## !! wikitext -## *a -## *b -## !! html/php+tidy -##
          -##
        • a
        • -##
        • b
        • -##
        -## !! html/parsoid -##
          -##
        • a
        • -## -##
        • b
        • -##
        -## !!end - -# This is a bug in the PHP parser + tidy combination. -# (The tag gets parsed as text and html-escaped by PHP, -# and then fostered out of the table by tidy.) -# We believe the Parsoid output to be correct. !! test Table with missing opening tag !! options @@ -10188,10 +9968,9 @@ parsoid=wt2html,wt2wt !! html+tidy - - + -
        foo
        foo
        + !! end ### @@ -10669,11 +10448,9 @@ title=[['foo & bar = baz']] parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true } !! wikitext ''{{PAGENAME}}'' -!! html/php +!! html+tidy

        'foo & bar = baz'

        -!! html+tidy -

        'foo & bar = baz'

        !! end !! test @@ -10683,11 +10460,9 @@ title=[[*RFC 1234 http://example.com/]] parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true } !! wikitext {{PAGENAME}} -!! html/php +!! html+tidy

        *RFC 1234 http://example.com/

        -!! html+tidy -

        *RFC 1234 http://example.com/

        !! end !! test @@ -10709,11 +10484,9 @@ title=[[*RFC 1234 http://example.com/]] parsoid={ "modes": ["wt2html","wt2wt"], "normalizePhp": true } !! wikitext {{PAGENAMEE}} -!! html/php +!! html+tidy

        *RFC_1234_http://example.com/

        -!! html+tidy -

        *RFC_1234_http://example.com/

        !! end !! test @@ -11474,12 +11247,7 @@ Template with thumb image (with link in description) This is a test template with parameter !! html+tidy -

        This is a test template with parameter

        - +

        This is a test template with parameter

        !! html/parsoid

        This is a test template with parameter

        link caption
        !! end @@ -11763,21 +11531,13 @@ Templates with intersecting and overlapping ranges {{echo|{{!}}hi}} |} !! html/php+tidy -

        ha

        -

        ho

        - - - - - - - -
        hi
        - - - - -
        +

        ha

        + +

        ho

        + + +
        hi +
        !! html/parsoid

        ha

        @@ -12486,30 +12246,21 @@ tag, nothing bad happens: {{echo|foo-{bar}bat}} !! html/php+tidy -

        This form breaks the template, which is unfortunate:

        -
          -
        • {{echo|foo-{bar}bat}}
        • -
        -

        But if the broken language converter markup is inside an extension tag, nothing bad happens:

        -
          -
        • foo-{bar}bat
        • -
        • foo-{bar}bat
        • -
        • -
          -foo-{bar}bat
          -
        • -
        • -
          -foo-{bar}bat
          -
        • -
        -
        -'foo-{bar}bat'
        +

        This form breaks the template, which is unfortunate: +

        +
        • {{echo|foo-{bar}bat}}
        +

        But if the broken language converter markup is inside an extension +tag, nothing bad happens: +

        +
        • foo-{bar}bat
        • +
        • foo-{bar}bat
        • +
        • foo-{bar}bat
        • +
        • foo-{bar}bat
        +
        'foo-{bar}bat'
         array (
         )
         
        -
        -'foo-{bar}bat'
        +
        'foo-{bar}bat'
         array (
         )
         
        @@ -12537,11 +12288,9 @@ parsoid=wt2html * [http://example.com Example in -{link} description] * {{echo|[http://example.com/-{foo Breaks template, however]}} !! html/php+tidy - + !! html/parsoid
        • Example in URL
        • @@ -12557,11 +12306,9 @@ Preprocessor precedence 14: broken language converter in comment * ...extra dashes * {{echo|foobat}} ...should be ok !! html/php+tidy -
            -
          • ...should be ok
          • -
          • ...extra dashes
          • -
          • foobat ...should be ok
          • -
          +
          • ...should be ok
          • +
          • ...extra dashes
          • +
          • foobat ...should be ok
          !! html/parsoid
          • ...should be ok
          • @@ -12592,17 +12339,23 @@ __NOTOC__ __NOEDITSECTION__ 6 !! html/php+tidy

            1 foo[bar 1

            -

            1

            +

            1 +

            2 foo[[bar 2

            -

            2

            +

            2 +

            3 foo{bar 3

            -

            3

            +

            3 +

            4 foo{{bar 4

            -

            4

            +

            4 +

            5 foo{{{bar 5

            -

            5

            +

            5 +

            6 foo-{bar 6

            -

            6

            +

            6 +

            !! html/parsoid

            1 foo[bar 1

            @@ -12775,9 +12528,7 @@ Templates: 2. Inside a block tag !! html+tidy
            Foo
            -
            -

            Foo

            -
            +

            Foo

            !!end !!test @@ -12816,9 +12567,9 @@ Templates: P-wrapping: 1c. Templates on consecutive lines bar
            baz
            !! html+tidy -

            Foo

            -

            bar

            -
            baz
            +

            Foo +

            +bar

            baz
            !! end !!test @@ -13242,12 +12993,9 @@ a
        !! html+tidy -

        a

        - - - - -
        + +a +
        !! end !!test @@ -13266,14 +13014,18 @@ foo -!! html+tidy +!! html/php+tidy
        +

        foo +

        +
        + +
        +!! html/parsoid +

        foo

        -
        - - - - +
        +
        !! end @@ -13290,13 +13042,15 @@ a
        b
        -!! html+tidy -

        a

        -
        b
        - - - - +!! html/php+tidy + +a +
        b
        +
        +!! html/parsoid +

        a

        b
        + +
        !! end @@ -13445,9 +13199,8 @@ a
        b{{echo|c
        d}}e a
        bc
        de !! html+tidy -

        a

        -
        bc
        -

        de

        +

        a

        bc

        de +

        !! end !!test @@ -14545,18 +14298,11 @@ thumbsize=220 123
        Foobar.jpg
        456 !! html/php+tidy -

        123Foobar.jpg456

        -

        123

        -
        Foobar.jpg
        -

        456 123

        -
        -
        Foobar.jpg -
        -
        -
        -
        -
        -

        456

        +

        123Foobar.jpg456 +

        +123

        Foobar.jpg

        456 +123

        Foobar.jpg

        456 +

        !! html/parsoid

        123456

        123

        456

        @@ -17464,11 +17210,9 @@ Remember AT&T? text with character entity: eacute !! wikitext I always thought é was a cute letter. -!! html +!! html+tidy

        I always thought é was a cute letter.

        -!! html+tidy -

        I always thought é was a cute letter.

        !! end !! test @@ -17546,12 +17290,11 @@ Ensure that HTML adoption agency algorithm is properly implemented. !! end # This was T43545 in the PHP parser. -# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext XYZ -!! html +!! html+tidy

        XYZ

        !! end @@ -17560,22 +17303,20 @@ Nesting of # Note that there are some other nestable tags (b, i, etc) which are # not covered; see T53081 for discussion. -# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext XYZ -!! html +!! html+tidy

        XYZ

        !! end -# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext XYZ -!! html +!! html+tidy

        XYZ

        !! end @@ -17585,10 +17326,10 @@ Nesting of !! wikitext XYZ !! html+tidy -

        XYZ

        +

        XYZ +

        !! end -# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -17598,7 +17339,6 @@ Nesting of

        !! end -# Note that tidy doesn't handle this correctly. !! test Nesting of !! wikitext @@ -17643,6 +17383,7 @@ Media link with text # FIXME: this is still bad HTML tag nesting # FIXME: doBlockLevels won't wrap this in a paragraph because it contains a div +# Parsoid & Remex fix the p-wrapping since they operate on the DOM. !! test Media link with nasty text !! wikitext @@ -18256,11 +17997,12 @@ Expansion of multi-line templates in attribute values (T8255 sanity check 2) !! end !! test -Tags which are hidden from Tidy cannot pass through the Sanitizer +Tags which are hidden from tidiers cannot pass through the Sanitizer !! wikitext !! html+tidy -

        <mw:toc><script>alert();</script></mw:toc>

        +

        <mw:toc><script>alert();</script></mw:toc> +

        !! end ### @@ -18565,14 +18307,14 @@ Nested template calls ### Sanitizer ### -# HTML+Tidy strips out empty tags completely. Parsoid doesn't. -# FIXME: Wikitext for this first test doesn't match its title. +# Remex wraps empty tag runs with p-tags. +# Parsoid strips them out during p-wrapping. !! test Sanitizer: Closing of open tags !! wikitext
        !! html/php+tidy - +

        !! html/parsoid
        !! end @@ -18593,6 +18335,8 @@ parsoid=wt2html !! wikitext !! html/php+tidy +

        +

        !! html/parsoid !! end @@ -18602,10 +18346,9 @@ Sanitizer: Closing of closed but not open table tags parsoid=wt2html !! wikitext Table not started -!! html/php+tidy -

        Table not started

        -!! html/parsoid -

        Table not started

        +!! html+tidy +

        Table not started +

        !! end !! test @@ -18776,10 +18519,6 @@ Self closed html pairs (T7487)
        In div text
        !! end -# -# -# - !! test Punctuation: nbsp before exclamation !! wikitext @@ -18837,9 +18576,9 @@ HTML bullet list, unclosed tags (T7497)
      !! html/php+tidy
        -
      • One
      • -
      • Two
      • -
      +
    3. One +
    4. Two +
    5. !! html/parsoid
      • One
      • @@ -18879,9 +18618,9 @@ HTML ordered list, unclosed tags (T7497)
    !! html/php+tidy
      -
    1. One
    2. -
    3. Two
    4. -
    +
  • One +
  • Two +
  • !! html/parsoid
    1. One
    2. @@ -18936,30 +18675,15 @@ HTML nested bullet list, open tags (T7497)
    3. Sub-two -!! html/php+tidy -
        -
      • One
      • -
      • Two: -
          -
        • Sub-one
        • -
        • Sub-two
        • -
        -
      • -
      -!! html/parsoid +!! html+tidy
      • One -
      • -
      • Two: +
      • Two:
        • Sub-one -
        • -
        • Sub-two -
        • -
        -
      • -
      - +
    4. Sub-two +
    5. + !! end !! test @@ -19102,17 +18826,12 @@ http://

      C !! html/php+tidy -

      onmouseover=[edit]

      -

      http://

      -
      -
      -

      Contents

      -
      +

      onmouseover=[edit]

      +http://

      Contents

      -

      !! html/parsoid

      onmouseover=

      http://__TOC__

      @@ -19144,31 +18863,22 @@ parsoid=wt2html,html2html !! end # Known to produce bogus xml (extra ) +# Don't add the html/php section since it generates broken HTML !! test Fuzz testing: Parser16 !! wikitext {| !https://|||||| -!! html +!! html+tidy - + -
      https:// - -
      -!! html+tidy - - - - - - - -
      https://
      + + !! end !! test @@ -19295,10 +19005,7 @@ http://example.com
      junk
      http://example.com
      junk
      !! html/php+tidy -

      http://example.com

      -
      -junk
      -
      +

      http://example.com

      junk
      !! html/parsoid

      http://example.com

      junk
      !! end @@ -21156,8 +20863,6 @@ parsoid=wt2html,wt2wt,html2html !! html/php

      JavaScript

      -!! html/php+tidy -

      JavaScript

      !! html/parsoid

      JavaScript

      !! end @@ -21166,11 +20871,9 @@ parsoid=wt2html,wt2wt,html2html HTML Hex character encoding bogus encoding (T28437 regression check) !! wikitext &#xsee;&#XSEE; -!! html/php +!! html

      &#xsee;&#XSEE;

      -!! html/parsoid -

      &#xsee;&#XSEE;

      !! end !! test @@ -21182,8 +20885,6 @@ parsoid=wt2html,wt2wt,html2html !! html/php

      îî

      -!! html/php+tidy -

      îî

      !! html/parsoid

      îî

      !! end @@ -21202,22 +20903,20 @@ Illegal character references (T106578) ; Surrogate: �� ; This is an okay astral character: 💩 !! html+tidy -
      -
      Null
      -
      &#00;
      -
      FF
      -
      &#xC;
      -
      CR
      -
      &#xD;
      -
      Control (low)
      -
      &#8;
      -
      Control (high)
      -
      &#x7F; &#x9F;
      -
      Surrogate
      -
      &#xD83D;&#xDCA9;
      -
      This is an okay astral character
      -
      💩
      -
      +
      Null
      +
      &#00;
      +
      FF
      +
      &#xC;
      +
      CR
      +
      &#xD;
      +
      Control (low)
      +
      &#8;
      +
      Control (high)
      +
      &#x7F; &#x9F;
      +
      Surrogate
      +
      &#xD83D;&#xDCA9;
      +
      This is an okay astral character
      +
      💩
      !! end !! test @@ -21234,11 +20933,9 @@ __FORCETOC__ ISBN code coverage !! wikitext ISBN 978-0-1234-56 789 -!! html +!! html/php

      ISBN 978-0-1234-56 789

      -!! html+tidy -

      ISBN 978-0-1234-56 789

      !! html/parsoid

      ISBN 978-0-1234-56 789

      !! end @@ -21334,11 +21031,11 @@ RFC [[RFC 1234]] RFC code coverage !! wikitext RFC 983 987 -!! html +!! html/php

      RFC 983 987

      -!! html+tidy -

      RFC 983 987

      +!! html/parsoid +

      RFC 983 987

      !! end !! test @@ -22702,10 +22399,10 @@ language=zh variant=zh-cn a-{H|0=>zh-cn:xy;0=>zh-tw:b
      c}-d !! html/php+tidy -

      ab

      -
      cd ab -
      cd ad
      -
      +ab
      cd +ab
      cd +ad +
      !! html/parsoid

      a

      d

      @@ -22991,10 +22688,9 @@ language=zh variant=zh-cn ;foo:bar ;-{zh-cn:AAA !! html/php+tidy -
      -
      foo:bar
      -
      -{zh-cn:AAA
      -
      +
      foo:bar
      +
      -{zh-cn:AAA

      +

      !! html/parsoid
      foo:bar
      -{zh-cn
      @@ -23171,14 +22867,8 @@ T2529: Uncovered bullet !! end -# Plain MediaWiki does not remove empty lists, but tidy actually does. -# Templates in Wikipedia rely on this behavior, as tidy has always been -# enabled there. These tests are normally run *without* tidy, so specify the -# full output here. -# To test realistic parsing behavior, apply a tidy-like transformation to both -# the expected output and your parser's output. !! test -T2529: Uncovered bullet leaving empty list, normally removed by tidy +T2529: Uncovered bullet in a deeply nested list !! wikitext ******* Foo {{bullet}} !! html @@ -23369,14 +23059,17 @@ Line two !! end +# doBlockLevels screws up this output and Remex cleans up as much as it can. +# Parsoid seems to do a better job here since its p-wrapper is probably smarter. !! test Nesting tags, paragraphs on lines which begin with
      !! wikitext
      A B !! html/php+tidy -

      A

      -

      B

      +

      A +

      B +

      !! html/parsoid

      A @@ -23396,9 +23089,8 @@ Line two Line two !! html+tidy -

      -

      Line one Line two

      -
      +

      Line one +Line two

      !! end !! test @@ -23416,8 +23108,9 @@ Line two !! html+tidy
      -

      Line one

      -Line two
      +

      Line one +

      +Line two

      !! end !! test @@ -23434,9 +23127,9 @@ Line two !! html+tidy -
      -

      Line one

      -

      Line two

      +

      Line one +

      Line two +

      !! end @@ -23455,13 +23148,9 @@ Line two

      -!! html+tidy -
      -

      Line one

      -

      Line two

      -
      !! end +# FIXME: Why does/should the blockquote+div combo suppress p-wrapping here? !! test Paragraphs inside blockquotes/divs (no extra line breaks) !! wikitext @@ -24369,21 +24058,13 @@ __TOC__

      Quote
      [edit]

      !! html/php+tidy -

      -
      -
      -

      Contents

      -
      +

      Contents

      -

      -

      -
      -

      Quote

      -
      -

      [edit]

      + +

      Quote

      [edit]

      !! html/parsoid

      Quote

      @@ -24437,23 +24118,15 @@ __TOC__

      Foo
      Bar
      [edit]

      !! html/php+tidy -

      -
      -
      -

      Contents

      -
      + -

      +

      Foo Bar[edit]

      -

      Foo

      -
      -

      Bar

      -
      -

      [edit]

      +

      Foo

      Bar

      [edit]

      !! html/parsoid

      Foo Bar

      @@ -24567,9 +24240,6 @@ __TOC__

      test test test

      !! end -# Note that the html output does not have the

      , but the -# html+tidy output *does*. This is because the empty

      is -# removed by the sanitizer, but only when tidy is *not* enabled (!). !! test Empty

      tag in TOC, removed by Sanitizer (T92892) !! wikitext @@ -24584,18 +24254,6 @@ __TOC__

      x[edit]

      -!! html/php+tidy -

      -
      -
      -

      Contents

      -
      - -
      -

      -

      x[edit]

      !! html/parsoid

      x

      @@ -25669,15 +25327,12 @@ parsoid=html2wt |} !! html/php+tidy + + - - - - - -
      foo|bar +
      foo|bar
      x -
      a|b
      -
      +x
      a|b
      + !! end !! test @@ -26786,8 +26441,8 @@ parsoid=wt2html,html2html !! wikitext
      -

      Foo

      +

      Foo +

      !! html/parsoid

      Foo

      !! end @@ -26929,11 +26584,9 @@ RT-ed inter-element separators should be valid separators !!end -# Parsoid-only since PHP parser relies on Tidy for correct output +# Parsoid-only test of a DOM pass !!test Trailing newlines in a deep dom-subtree that ends a wikitext line should be migrated out -!!options -parsoid !! wikitext {| |foo @@ -26943,7 +26596,7 @@ bar {| |foo |} -!! html +!! html/parsoid @@ -28583,8 +28236,15 @@ Magic links inside links (not autolinked) [http://foo.com PMID 1234] [http://foo.com ISBN 123456789x] !! html+tidy -

      http://example.com RFC 1234 PMID 1234 ISBN 123456789x

      -

      http://example.com RFC 1234 PMID 1234 ISBN 123456789x

      +

      http://example.com +RFC 1234 +PMID 1234 +ISBN 123456789x +

      http://example.com +RFC 1234 +PMID 1234 +ISBN 123456789x +

      !! html/parsoid

      http://example.com RFC 1234 @@ -28605,34 +28265,10 @@ Magic links inside image captions (autolinked) [[File:Foobar.jpg|thumb|PMID 1234]] [[File:Foobar.jpg|thumb|ISBN 123456789x]] !! html+tidy -

      - -
      -
      -
      - -
      -
      -
      -
      - -
      -
      -
      - -
      + + + + !! html/parsoid
      http://example.com
      RFC 1234
      @@ -28706,24 +28342,27 @@ parsoid=html2wt,wt2wt |} !! html/php+tidy
      foo

      bar

      + + + - - - - - - - + + - - - -
      - +- +
      --
      --
      - +- +
      -
      -

      -

      +
      -
      -

      -

      +

      - +

      +
      +

      - +

      + !! end !! test @@ -29865,27 +29504,20 @@ Empty LI (T49673) * * * b -!! html/php+tidy -
        -
      • a
      • -
      • +!! html+tidy +
        • a
        • +
        • -
        • b
        • -
        +
      • b
      !! end +# FIXME: Why is there no html/parsoid section here? !! test Thumbnail output !! wikitext [[File:Thumb.png|thumb]] !! html/php+tidy -
      -
      Thumb.png -
      -
      -
      -
      -
      +
      Thumb.png
      !! end !! test