Merge "EditPage: Drop 80% width restriction on wpSummary for OOUI form"
[lhc/web/wiklou.git] / tests / phpunit / includes / SanitizerTest.php
1 <?php
2
3 /**
4 * @todo Tests covering decodeCharReferences can be refactored into a single
5 * method and dataprovider.
6 */
7 class SanitizerTest extends MediaWikiTestCase {
8
9 protected function tearDown() {
10 MWTidy::destroySingleton();
11 parent::tearDown();
12 }
13
14 /**
15 * @covers Sanitizer::decodeCharReferences
16 */
17 public function testDecodeNamedEntities() {
18 $this->assertEquals(
19 "\xc3\xa9cole",
20 Sanitizer::decodeCharReferences( '&eacute;cole' ),
21 'decode named entities'
22 );
23 }
24
25 /**
26 * @covers Sanitizer::decodeCharReferences
27 */
28 public function testDecodeNumericEntities() {
29 $this->assertEquals(
30 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
31 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
32 'decode numeric entities'
33 );
34 }
35
36 /**
37 * @covers Sanitizer::decodeCharReferences
38 */
39 public function testDecodeMixedEntities() {
40 $this->assertEquals(
41 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
42 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
43 'decode mixed numeric/named entities'
44 );
45 }
46
47 /**
48 * @covers Sanitizer::decodeCharReferences
49 */
50 public function testDecodeMixedComplexEntities() {
51 $this->assertEquals(
52 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
53 Sanitizer::decodeCharReferences(
54 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
55 ),
56 'decode mixed complex entities'
57 );
58 }
59
60 /**
61 * @covers Sanitizer::decodeCharReferences
62 */
63 public function testInvalidAmpersand() {
64 $this->assertEquals(
65 'a & b',
66 Sanitizer::decodeCharReferences( 'a & b' ),
67 'Invalid ampersand'
68 );
69 }
70
71 /**
72 * @covers Sanitizer::decodeCharReferences
73 */
74 public function testInvalidEntities() {
75 $this->assertEquals(
76 '&foo;',
77 Sanitizer::decodeCharReferences( '&foo;' ),
78 'Invalid named entity'
79 );
80 }
81
82 /**
83 * @covers Sanitizer::decodeCharReferences
84 */
85 public function testInvalidNumberedEntities() {
86 $this->assertEquals(
87 UtfNormal\Constants::UTF8_REPLACEMENT,
88 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
89 'Invalid numbered entity'
90 );
91 }
92
93 /**
94 * @covers Sanitizer::removeHTMLtags
95 * @dataProvider provideHtml5Tags
96 *
97 * @param string $tag Name of an HTML5 element (ie: 'video')
98 * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
99 */
100 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
101 MWTidy::setInstance( false );
102
103 if ( $escaped ) {
104 $this->assertEquals( "&lt;$tag&gt;",
105 Sanitizer::removeHTMLtags( "<$tag>" )
106 );
107 } else {
108 $this->assertEquals( "<$tag></$tag>\n",
109 Sanitizer::removeHTMLtags( "<$tag>" )
110 );
111 }
112 }
113
114 /**
115 * Provide HTML5 tags
116 */
117 public static function provideHtml5Tags() {
118 $ESCAPED = true; # We want tag to be escaped
119 $VERBATIM = false; # We want to keep the tag
120 return [
121 [ 'data', $VERBATIM ],
122 [ 'mark', $VERBATIM ],
123 [ 'time', $VERBATIM ],
124 [ 'video', $ESCAPED ],
125 ];
126 }
127
128 function dataRemoveHTMLtags() {
129 return [
130 // former testSelfClosingTag
131 [
132 '<div>Hello world</div />',
133 '<div>Hello world</div>',
134 'Self-closing closing div'
135 ],
136 // Make sure special nested HTML5 semantics are not broken
137 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
138 [
139 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
140 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
141 'Nested <kbd>.'
142 ],
143 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
144 [
145 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
146 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
147 'Nested <var>.'
148 ],
149 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
150 [
151 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
152 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
153 '<abbr> inside <dfn>',
154 ],
155 ];
156 }
157
158 /**
159 * @dataProvider dataRemoveHTMLtags
160 * @covers Sanitizer::removeHTMLtags
161 */
162 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
163 MWTidy::setInstance( false );
164 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
165 }
166
167 /**
168 * @dataProvider provideTagAttributesToDecode
169 * @covers Sanitizer::decodeTagAttributes
170 */
171 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
172 $this->assertEquals( $expected,
173 Sanitizer::decodeTagAttributes( $attributes ),
174 $message
175 );
176 }
177
178 public static function provideTagAttributesToDecode() {
179 return [
180 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
181 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
182 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
183 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
184 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
185 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
186 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
187 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
188 [
189 [ 'foo' => 'bar', 'baz' => 'foo' ],
190 'foo=\'bar\' baz="foo"',
191 'Several attributes'
192 ],
193 [
194 [ 'foo' => 'bar', 'baz' => 'foo' ],
195 'foo=\'bar\' baz="foo"',
196 'Several attributes'
197 ],
198 [
199 [ 'foo' => 'bar', 'baz' => 'foo' ],
200 'foo=\'bar\' baz="foo"',
201 'Several attributes'
202 ],
203 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
204 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
205 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
206 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
207
208 # Invalid beginning
209 [ [], '-foo=bar', 'Leading - is forbidden' ],
210 [ [], '.foo=bar', 'Leading . is forbidden' ],
211 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
212 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
213 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
214 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
215 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
216
217 # This bit is more relaxed than XML rules, but some extensions use
218 # it, like ProofreadPage (see T29539)
219 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
220 [ [], 'foo$=baz', 'Symbols are not allowed' ],
221 [ [], 'foo@=baz', 'Symbols are not allowed' ],
222 [ [], 'foo~=baz', 'Symbols are not allowed' ],
223 [
224 [ 'foo' => '1[#^`*%w/(' ],
225 'foo=1[#^`*%w/(',
226 'All kind of characters are allowed as values'
227 ],
228 [
229 [ 'foo' => '1[#^`*%\'w/(' ],
230 'foo="1[#^`*%\'w/("',
231 'Double quotes are allowed if quoted by single quotes'
232 ],
233 [
234 [ 'foo' => '1[#^`*%"w/(' ],
235 'foo=\'1[#^`*%"w/(\'',
236 'Single quotes are allowed if quoted by double quotes'
237 ],
238 [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
239 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
240 ];
241 }
242
243 /**
244 * @dataProvider provideDeprecatedAttributes
245 * @covers Sanitizer::fixTagAttributes
246 */
247 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
248 $this->assertEquals( " $inputAttr",
249 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
250 $message
251 );
252 }
253
254 public static function provideDeprecatedAttributes() {
255 /** [ <attribute>, <element>, [message] ] */
256 return [
257 [ 'clear="left"', 'br' ],
258 [ 'clear="all"', 'br' ],
259 [ 'width="100"', 'td' ],
260 [ 'nowrap="true"', 'td' ],
261 [ 'nowrap=""', 'td' ],
262 [ 'align="right"', 'td' ],
263 [ 'align="center"', 'table' ],
264 [ 'align="left"', 'tr' ],
265 [ 'align="center"', 'div' ],
266 [ 'align="left"', 'h1' ],
267 [ 'align="left"', 'p' ],
268 ];
269 }
270
271 /**
272 * @dataProvider provideCssCommentsFixtures
273 * @covers Sanitizer::checkCss
274 */
275 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
276 $this->assertEquals( $expected,
277 Sanitizer::checkCss( $css ),
278 $message
279 );
280 }
281
282 public static function provideCssCommentsFixtures() {
283 /** [ <expected>, <css>, [message] ] */
284 return [
285 // Valid comments spanning entire input
286 [ '/**/', '/**/' ],
287 [ '/* comment */', '/* comment */' ],
288 // Weird stuff
289 [ ' ', '/****/' ],
290 [ ' ', '/* /* */' ],
291 [ 'display: block;', "display:/* foo */block;" ],
292 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
293 'Backslash-escaped comments must be stripped (T30450)' ],
294 [ '', '/* unfinished comment structure',
295 'Remove anything after a comment-start token' ],
296 [ '', "\\2f\\2a unifinished comment'",
297 'Remove anything after a backslash-escaped comment-start token' ],
298 [
299 '/* insecure input */',
300 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
301 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
302 ],
303 [
304 '/* insecure input */',
305 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
306 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
307 ],
308 [ '/* insecure input */', 'width: expression(1+1);' ],
309 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
310 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
311 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
312 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
313 [
314 '/* insecure input */',
315 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
316 ],
317 [
318 '/* insecure input */',
319 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
320 ],
321 [ '/* insecure input */', 'foo: attr( title, url );' ],
322 [ '/* insecure input */', 'foo: attr( title url );' ],
323 ];
324 }
325
326 /**
327 * @dataProvider provideEscapeHtmlAllowEntities
328 * @covers Sanitizer::escapeHtmlAllowEntities
329 */
330 public function testEscapeHtmlAllowEntities( $expected, $html ) {
331 $this->assertEquals(
332 $expected,
333 Sanitizer::escapeHtmlAllowEntities( $html )
334 );
335 }
336
337 public static function provideEscapeHtmlAllowEntities() {
338 return [
339 [ 'foo', 'foo' ],
340 [ 'a¡b', 'a&#161;b' ],
341 [ 'foo&#039;bar', "foo'bar" ],
342 [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
343 ];
344 }
345
346 /**
347 * Test Sanitizer::escapeId
348 *
349 * @dataProvider provideEscapeId
350 * @covers Sanitizer::escapeId
351 */
352 public function testEscapeId( $input, $output ) {
353 $this->assertEquals(
354 $output,
355 Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
356 );
357 }
358
359 public static function provideEscapeId() {
360 return [
361 [ '+', '.2B' ],
362 [ '&', '.26' ],
363 [ '=', '.3D' ],
364 [ ':', ':' ],
365 [ ';', '.3B' ],
366 [ '@', '.40' ],
367 [ '$', '.24' ],
368 [ '-_.', '-_.' ],
369 [ '!', '.21' ],
370 [ '*', '.2A' ],
371 [ '/', '.2F' ],
372 [ '[]', '.5B.5D' ],
373 [ '<>', '.3C.3E' ],
374 [ '\'', '.27' ],
375 [ '§', '.C2.A7' ],
376 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
377 [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26C.26amp.3BD.26amp.3Bamp.3BE' ],
378 ];
379 }
380
381 /**
382 * Test escapeIdReferenceList for consistency with escapeId
383 *
384 * @dataProvider provideEscapeIdReferenceList
385 * @covers Sanitizer::escapeIdReferenceList
386 */
387 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
388 $this->assertEquals(
389 Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
390 Sanitizer::escapeId( $id1, 'noninitial' )
391 . ' '
392 . Sanitizer::escapeId( $id2, 'noninitial' )
393 );
394 }
395
396 public static function provideEscapeIdReferenceList() {
397 /** [ <reference list>, <individual id 1>, <individual id 2> ] */
398 return [
399 [ 'foo bar', 'foo', 'bar' ],
400 [ '#1 #2', '#1', '#2' ],
401 [ '+1 +2', '+1', '+2' ],
402 ];
403 }
404
405 /**
406 * @dataProvider provideIsReservedDataAttribute
407 */
408 public function testIsReservedDataAttribute( $attr, $expected ) {
409 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
410 }
411
412 public static function provideIsReservedDataAttribute() {
413 return [
414 [ 'foo', false ],
415 [ 'data', false ],
416 [ 'data-foo', false ],
417 [ 'data-mw', true ],
418 [ 'data-ooui', true ],
419 [ 'data-parsoid', true ],
420 [ 'data-mw-foo', true ],
421 [ 'data-ooui-foo', true ],
422 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
423 ];
424 }
425 }