Merge "Avoid master queries when old filter checks are viewed"
[lhc/web/wiklou.git] / tests / phpunit / includes / tidy / BalancerTest.php
1 <?php
2
3 class BalancerTest extends MediaWikiTestCase {
4
5 /**
6 * Anything that needs to happen before your tests should go here.
7 */
8 protected function setUp() {
9 // Be sure to do call the parent setup and teardown functions.
10 // This makes sure that all the various cleanup and restorations
11 // happen as they should (including the restoration for setMwGlobals).
12 parent::setUp();
13 }
14
15 /**
16 * @covers MediaWiki\Tidy\Balancer::balance
17 * @dataProvider provideBalancerTests
18 */
19 public function testBalancer( $description, $input, $expected, $useTidy ) {
20 $balancer = new MediaWiki\Tidy\Balancer( [
21 'strict' => false, /* not strict */
22 'allowedHtmlElements' => null, /* no sanitization */
23 'tidyCompat' => $useTidy, /* standard parser */
24 'allowComments' => true, /* comment parsing */
25 ] );
26 $output = $balancer->balance( $input );
27
28 // Ignore self-closing tags
29 $output = preg_replace( '/\s*\/>/', '>', $output );
30
31 $this->assertEquals( $expected, $output, $description );
32 }
33
34 public static function provideBalancerTests() {
35 // Get the tests from html5lib-tests.json
36 $json = json_decode( file_get_contents(
37 __DIR__ . '/html5lib-tests.json'
38 ), true );
39 // Munge this slightly into the format phpunit expects
40 // for providers, and filter out HTML constructs which
41 // the balancer doesn't support.
42 $tests = [];
43 $okre = "~ \A
44 (?i:<!DOCTYPE\ html>)?
45 <html><head></head><body>
46 .*
47 </body></html>
48 \z ~xs";
49 foreach ( $json as $filename => $cases ) {
50 foreach ( $cases as $case ) {
51 $html = $case['document']['html'];
52 if ( !preg_match( $okre, $html ) ) {
53 // Skip tests which involve stuff in the <head> or
54 // weird doctypes.
55 continue;
56 }
57 // We used to do this:
58 // $html = substr( $html, strlen( $start ), -strlen( $end ) );
59 // But now we use a different field in the test case,
60 // which reports how domino would parse this case in a
61 // no-quirks <body> context. (The original test case may
62 // have had a different context, or relied on quirks mode.)
63 $html = $case['document']['noQuirksBodyHtml'];
64 // Normalize case of SVG attributes.
65 $html = str_replace( 'foreignObject', 'foreignobject', $html );
66 // Normalize case of MathML attributes.
67 $html = str_replace( 'definitionURL', 'definitionurl', $html );
68
69 if (
70 isset( $case['document']['props']['comment'] ) &&
71 preg_match( ',<!--[^>]*<,', $html )
72 ) {
73 // Skip tests which include HTML comments containing
74 // the < character, which we don't support.
75 continue;
76 }
77 if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
78 // Skip tests involving <![CDATA[ ]]> quoting.
79 continue;
80 }
81 if (
82 stripos( $case['data'], '<!DOCTYPE' ) !== false &&
83 stripos( $case['data'], '<!DOCTYPE html>' ) === false
84 ) {
85 // Skip tests involving unusual doctypes.
86 continue;
87 }
88 $literalre = "~ <rdar: | < /? (
89 html | head | body | frame | frameset | plaintext
90 ) > ~xi";
91 if ( preg_match( $literalre, $case['data'] ) ) {
92 // Skip tests involving some literal tags, which are
93 // unsupported but don't show up in the expected output.
94 continue;
95 }
96 if (
97 isset( $case['document']['props']['tags']['iframe'] ) ||
98 isset( $case['document']['props']['tags']['noembed'] ) ||
99 isset( $case['document']['props']['tags']['noscript'] ) ||
100 isset( $case['document']['props']['tags']['script'] ) ||
101 isset( $case['document']['props']['tags']['svg script'] ) ||
102 isset( $case['document']['props']['tags']['svg title'] ) ||
103 isset( $case['document']['props']['tags']['title'] ) ||
104 isset( $case['document']['props']['tags']['xmp'] )
105 ) {
106 // Skip tests with unsupported tags which *do* show
107 // up in the expected output.
108 continue;
109 }
110 if (
111 $filename === 'entities01.dat' ||
112 $filename === 'entities02.dat' ||
113 preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
114 preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
115 ) {
116 // Skip tests involving entity encoding.
117 continue;
118 }
119 if (
120 isset( $case['document']['props']['tagWithLt'] ) ||
121 isset( $case['document']['props']['attrWithFunnyChar'] ) ||
122 preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
123 preg_match( ':</p<p>:', $case['data'] ) ||
124 preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
125 ) {
126 // Skip tests with funny tag or attribute names,
127 // which are really tests of the HTML tokenizer, not
128 // the tree builder.
129 continue;
130 }
131 if (
132 preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
133 ) {
134 // The Sanitizer normalizes whitespace in attribute
135 // values, which makes this test case invalid.
136 continue;
137 }
138 if ( $filename === 'plain-text-unsafe.dat' ) {
139 // Skip tests with ASCII null, etc.
140 continue;
141 }
142 $data = preg_replace(
143 '~<!DOCTYPE html>~i', '', $case['data']
144 );
145 $tests[] = [
146 $filename, # use better description?
147 $data,
148 $html,
149 false # strict HTML5 compat mode, no tidy
150 ];
151 }
152 }
153
154 # Some additional tests for mediawiki-specific features
155 $tests[] = [
156 'Round-trip serialization for <pre>/<listing>/<textarea>',
157 "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
158 "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
159 true # use the tidy-compatible mode
160 ];
161
162 return $tests;
163 }
164 }