tests/phpunit/includes/tidy/BalancerTest.php

   1 <?php
   2
   3 class BalancerTest extends MediaWikiTestCase {
   4
   5         /**
   6          * Anything that needs to happen before your tests should go here.
   7          */
   8         protected function setUp() {
   9                 // Be sure to do call the parent setup and teardown functions.
  10                 // This makes sure that all the various cleanup and restorations
  11                 // happen as they should (including the restoration for setMwGlobals).
  12                 parent::setUp();
  13         }
  14
  15         /**
  16          * @covers MediaWiki\Tidy\Balancer::balance
  17          * @dataProvider provideBalancerTests
  18          */
  19         public function testBalancer( $description, $input, $expected, $useTidy ) {
  20                 $balancer = new MediaWiki\Tidy\Balancer( [
  21                         'strict' => false, /* not strict */
  22                         'allowedHtmlElements' => null, /* no sanitization */
  23                         'tidyCompat' => $useTidy, /* standard parser */
  24                         'allowComments' => true, /* comment parsing */
  25                 ] );
  26                 $output = $balancer->balance( $input );
  27
  28                 // Ignore self-closing tags
  29                 $output = preg_replace( '/\s*\/>/', '>', $output );
  30
  31                 $this->assertEquals( $expected, $output, $description );
  32         }
  33
  34         public static function provideBalancerTests() {
  35                 // Get the tests from html5lib-tests.json
  36                 $json = json_decode( file_get_contents(
  37                         __DIR__ . '/html5lib-tests.json'
  38                 ), true );
  39                 // Munge this slightly into the format phpunit expects
  40                 // for providers, and filter out HTML constructs which
  41                 // the balancer doesn't support.
  42                 $tests = [];
  43                 $okre = "~ \A
  44                         (?i:<!DOCTYPE\ html>)?
  45                         <html><head></head><body>
  46                         .*
  47                         </body></html>
  48                 \z ~xs";
  49                 foreach ( $json as $filename => $cases ) {
  50                         foreach ( $cases as $case ) {
  51                                 $html = $case['document']['html'];
  52                                 if ( !preg_match( $okre, $html ) ) {
  53                                         // Skip tests which involve stuff in the <head> or
  54                                         // weird doctypes.
  55                                         continue;
  56                                 }
  57                                 // We used to do this:
  58                                 //   $html = substr( $html, strlen( $start ), -strlen( $end ) );
  59                                 // But now we use a different field in the test case,
  60                                 // which reports how domino would parse this case in a
  61                                 // no-quirks <body> context.  (The original test case may
  62                                 // have had a different context, or relied on quirks mode.)
  63                                 $html = $case['document']['noQuirksBodyHtml'];
  64                                 // Normalize case of SVG attributes.
  65                                 $html = str_replace( 'foreignObject', 'foreignobject', $html );
  66                                 // Normalize case of MathML attributes.
  67                                 $html = str_replace( 'definitionURL', 'definitionurl', $html );
  68
  69                                 if (
  70                                         isset( $case['document']['props']['comment'] ) &&
  71                                         preg_match( ',<!--[^>]*<,', $html )
  72                                 ) {
  73                                         // Skip tests which include HTML comments containing
  74                                         // the < character, which we don't support.
  75                                         continue;
  76                                 }
  77                                 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
  78                                         // Skip tests involving <![CDATA[ ]]> quoting.
  79                                         continue;
  80                                 }
  81                                 if (
  82                                         stripos( $case['data'], '<!DOCTYPE' ) !== false &&
  83                                         stripos( $case['data'], '<!DOCTYPE html>' ) === false
  84                                 ) {
  85                                         // Skip tests involving unusual doctypes.
  86                                         continue;
  87                                 }
  88                                 $literalre = "~ <rdar: | < /? (
  89                                         html | head | body | frame | frameset | plaintext
  90                                 ) > ~xi";
  91                                 if ( preg_match( $literalre, $case['data'] ) ) {
  92                                         // Skip tests involving some literal tags, which are
  93                                         // unsupported but don't show up in the expected output.
  94                                         continue;
  95                                 }
  96                                 if (
  97                                         isset( $case['document']['props']['tags']['iframe'] ) ||
  98                                         isset( $case['document']['props']['tags']['noembed'] ) ||
  99                                         isset( $case['document']['props']['tags']['noscript'] ) ||
 100                                         isset( $case['document']['props']['tags']['script'] ) ||
 101                                         isset( $case['document']['props']['tags']['svg script'] ) ||
 102                                         isset( $case['document']['props']['tags']['svg title'] ) ||
 103                                         isset( $case['document']['props']['tags']['title'] ) ||
 104                                         isset( $case['document']['props']['tags']['xmp'] )
 105                                 ) {
 106                                         // Skip tests with unsupported tags which *do* show
 107                                         // up in the expected output.
 108                                         continue;
 109                                 }
 110                                 if (
 111                                         $filename === 'entities01.dat' ||
 112                                         $filename === 'entities02.dat' ||
 113                                         preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
 114                                         preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
 115                                 ) {
 116                                         // Skip tests involving entity encoding.
 117                                         continue;
 118                                 }
 119                                 if (
 120                                         isset( $case['document']['props']['tagWithLt'] ) ||
 121                                         isset( $case['document']['props']['attrWithFunnyChar'] ) ||
 122                                         preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
 123                                         preg_match( ':</p<p>:', $case['data'] ) ||
 124                                         preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
 125                                 ) {
 126                                         // Skip tests with funny tag or attribute names,
 127                                         // which are really tests of the HTML tokenizer, not
 128                                         // the tree builder.
 129                                         continue;
 130                                 }
 131                                 if (
 132                                         preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
 133                                 ) {
 134                                         // The Sanitizer normalizes whitespace in attribute
 135                                         // values, which makes this test case invalid.
 136                                         continue;
 137                                 }
 138                                 if ( $filename === 'plain-text-unsafe.dat' ) {
 139                                         // Skip tests with ASCII null, etc.
 140                                         continue;
 141                                 }
 142                                 $data = preg_replace(
 143                                         '~<!DOCTYPE html>~i', '', $case['data']
 144                                 );
 145                                 $tests[] = [
 146                                         $filename, # use better description?
 147                                         $data,
 148                                         $html,
 149                                         false # strict HTML5 compat mode, no tidy
 150                                 ];
 151                         }
 152                 }
 153
 154                 # Some additional tests for mediawiki-specific features
 155                 $tests[] = [
 156                         'Round-trip serialization for <pre>/<listing>/<textarea>',
 157                         "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
 158                         "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
 159                         true # use the tidy-compatible mode
 160                 ];
 161
 162                 return $tests;
 163         }
 164 }