balancer = new MediaWiki\Tidy\Balancer( [ 'strict' => false, /* not strict */ 'allowedHtmlElements' => null, /* no sanitization */ 'tidyCompat' => false, /* standard parser */ ] ); } /** * Anything cleanup you need to do should go here. */ protected function tearDown() { parent::tearDown(); } /** * @covers Balancer::balance * @dataProvider provideBalancerTests */ public function testBalancer( $description, $input, $expected ) { $output = $this->balancer->balance( $input ); // Ignore self-closing tags $output = preg_replace( '/\s*\/>/', '>', $output ); $this->assertEquals( $expected, $output, $description ); } public static function provideBalancerTests() { // Get the tests from html5lib-tests.json $json = json_decode( file_get_contents( __DIR__ . '/html5lib-tests.json' ), true ); // Munge this slightly into the format phpunit expects // for providers, and filter out HTML constructs which // the balancer doesn't support. $tests = []; $start = ''; $end = ''; foreach ( $json as $filename => $cases ) { foreach ( $cases as $case ) { $html = $case['document']['html']; if ( substr( $html, 0, strlen( $start ) ) !== $start || substr( $html, -strlen( $end ) ) !== $end ) { // Skip tests which involve stuff in the or // weird doctypes. continue; } // We used to do this: // $html = substr( $html, strlen( $start ), -strlen( $end ) ); // But now we use a different field in the test case, // which reports how domino would parse this case in a // no-quirks context. (The original test case may // have had a different context, or relied on quirks mode.) $html = $case['document']['noQuirksBodyHtml']; // Normalize case of SVG attributes. $html = str_replace( 'foreignObject', 'foreignobject', $html ); if ( isset( $case['document']['props']['comment'] ) ) { // Skip tests which include HTML comments, which // the balancer requires to have been stripped. continue; } if ( strpos( $case['data'], ' quoting. continue; } if ( stripos( $case['data'], '|)$:', $case['data'] ) || preg_match( '::', $case['data'] ) ) { // Skip tests with funny tag or attribute names, // which are really tests of the HTML tokenizer, not // the tree builder. continue; } if ( stripos( $case['data'], 'encoding=" text/html "' ) !== false ) { // The Sanitizer normalizes whitespace in attribute // values, which makes this test case invalid. continue; } if ( $filename === 'plain-text-unsafe.dat' ) { // Skip tests with ASCII null, etc. continue; } $tests[] = [ $filename, # use better description? $case['data'], $html ]; } } return $tests; } }