3 use Wikimedia\TestingAccessWrapper
;
6 * @todo Tests covering decodeCharReferences can be refactored into a single
7 * method and dataprovider.
11 class SanitizerTest
extends MediaWikiTestCase
{
13 protected function tearDown() {
14 MWTidy
::destroySingleton();
19 * @covers Sanitizer::decodeCharReferences
21 public function testDecodeNamedEntities() {
24 Sanitizer
::decodeCharReferences( 'école' ),
25 'decode named entities'
30 * @covers Sanitizer::decodeCharReferences
32 public function testDecodeNumericEntities() {
34 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
35 Sanitizer
::decodeCharReferences( "Ĉio bonas dans l'école!" ),
36 'decode numeric entities'
41 * @covers Sanitizer::decodeCharReferences
43 public function testDecodeMixedEntities() {
45 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
46 Sanitizer
::decodeCharReferences( "Ĉio bonas dans l'école!" ),
47 'decode mixed numeric/named entities'
52 * @covers Sanitizer::decodeCharReferences
54 public function testDecodeMixedComplexEntities() {
56 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)",
57 Sanitizer
::decodeCharReferences(
58 "Ĉio bonas dans l'école! (mais pas Ĉio dans l'école)"
60 'decode mixed complex entities'
65 * @covers Sanitizer::decodeCharReferences
67 public function testInvalidAmpersand() {
70 Sanitizer
::decodeCharReferences( 'a & b' ),
76 * @covers Sanitizer::decodeCharReferences
78 public function testInvalidEntities() {
81 Sanitizer
::decodeCharReferences( '&foo;' ),
82 'Invalid named entity'
87 * @covers Sanitizer::decodeCharReferences
89 public function testInvalidNumberedEntities() {
91 UtfNormal\Constants
::UTF8_REPLACEMENT
,
92 Sanitizer
::decodeCharReferences( "�" ),
93 'Invalid numbered entity'
98 * @covers Sanitizer::removeHTMLtags
99 * @dataProvider provideHtml5Tags
101 * @param string $tag Name of an HTML5 element (ie: 'video')
102 * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '<video>')
104 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
105 $this->hideDeprecated( 'disabling tidy' );
106 $this->hideDeprecated( 'MWTidy::setInstance' );
107 MWTidy
::setInstance( false );
110 $this->assertEquals( "<$tag>",
111 Sanitizer
::removeHTMLtags( "<$tag>" )
114 $this->assertEquals( "<$tag></$tag>\n",
115 Sanitizer
::removeHTMLtags( "<$tag>" )
123 public static function provideHtml5Tags() {
124 $ESCAPED = true; # We want tag to be escaped
125 $VERBATIM = false; # We want to keep the tag
127 [ 'data', $VERBATIM ],
128 [ 'mark', $VERBATIM ],
129 [ 'time', $VERBATIM ],
130 [ 'video', $ESCAPED ],
134 function dataRemoveHTMLtags() {
136 // former testSelfClosingTag
138 '<div>Hello world</div />',
139 '<div>Hello world</div>',
140 'Self-closing closing div'
142 // Make sure special nested HTML5 semantics are not broken
143 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
145 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
146 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
149 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
151 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
152 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
155 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
157 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
158 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
159 '<abbr> inside <dfn>',
165 * @dataProvider dataRemoveHTMLtags
166 * @covers Sanitizer::removeHTMLtags
168 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
169 $this->hideDeprecated( 'disabling tidy' );
170 $this->hideDeprecated( 'MWTidy::setInstance' );
171 MWTidy
::setInstance( false );
172 $this->assertEquals( $output, Sanitizer
::removeHTMLtags( $input ), $msg );
176 * @dataProvider provideTagAttributesToDecode
177 * @covers Sanitizer::decodeTagAttributes
179 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
180 $this->assertEquals( $expected,
181 Sanitizer
::decodeTagAttributes( $attributes ),
186 public static function provideTagAttributesToDecode() {
188 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
189 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
190 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
191 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
192 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
193 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
194 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
195 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
197 [ 'foo' => 'bar', 'baz' => 'foo' ],
198 'foo=\'bar\' baz="foo"',
202 [ 'foo' => 'bar', 'baz' => 'foo' ],
203 'foo=\'bar\' baz="foo"',
207 [ 'foo' => 'bar', 'baz' => 'foo' ],
208 'foo=\'bar\' baz="foo"',
211 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
212 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
213 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
214 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
217 [ [], '-foo=bar', 'Leading - is forbidden' ],
218 [ [], '.foo=bar', 'Leading . is forbidden' ],
219 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
220 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
221 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
222 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
223 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
225 # This bit is more relaxed than XML rules, but some extensions use
226 # it, like ProofreadPage (see T29539)
227 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
228 [ [], 'foo$=baz', 'Symbols are not allowed' ],
229 [ [], 'foo@=baz', 'Symbols are not allowed' ],
230 [ [], 'foo~=baz', 'Symbols are not allowed' ],
232 [ 'foo' => '1[#^`*%w/(' ],
234 'All kind of characters are allowed as values'
237 [ 'foo' => '1[#^`*%\'w/(' ],
238 'foo="1[#^`*%\'w/("',
239 'Double quotes are allowed if quoted by single quotes'
242 [ 'foo' => '1[#^`*%"w/(' ],
243 'foo=\'1[#^`*%"w/(\'',
244 'Single quotes are allowed if quoted by double quotes'
246 [ [ 'foo' => '&"' ], 'foo=&"', 'Special chars can be provided as entities' ],
247 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
252 * @dataProvider provideDeprecatedAttributes
253 * @covers Sanitizer::fixTagAttributes
254 * @covers Sanitizer::validateTagAttributes
255 * @covers Sanitizer::validateAttributes
257 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
258 $this->assertEquals( " $inputAttr",
259 Sanitizer
::fixTagAttributes( $inputAttr, $inputEl ),
264 public static function provideDeprecatedAttributes() {
265 /** [ <attribute>, <element>, [message] ] */
267 [ 'clear="left"', 'br' ],
268 [ 'clear="all"', 'br' ],
269 [ 'width="100"', 'td' ],
270 [ 'nowrap="true"', 'td' ],
271 [ 'nowrap=""', 'td' ],
272 [ 'align="right"', 'td' ],
273 [ 'align="center"', 'table' ],
274 [ 'align="left"', 'tr' ],
275 [ 'align="center"', 'div' ],
276 [ 'align="left"', 'h1' ],
277 [ 'align="left"', 'p' ],
282 * @dataProvider provideValidateTagAttributes
283 * @covers Sanitizer::validateTagAttributes
284 * @covers Sanitizer::validateAttributes
286 public function testValidateTagAttributes( $element, $attribs, $expected ) {
287 $actual = Sanitizer
::validateTagAttributes( $attribs, $element );
288 $this->assertArrayEquals( $expected, $actual, false, true );
291 public static function provideValidateTagAttributes() {
294 [ 'id' => 'foo bar', 'bogus' => 'stripped', 'data-foo' => 'bar' ],
295 [ 'id' => 'foo_bar', 'data-foo' => 'bar' ],
298 [ 'id' => 'foo bar', 'itemprop' => 'foo', 'content' => 'bar' ],
299 [ 'itemprop' => 'foo', 'content' => 'bar' ],
305 * @dataProvider provideAttributeWhitelist
306 * @covers Sanitizer::attributeWhitelist
308 public function testAttributeWhitelist( $element, $attribs ) {
309 $this->hideDeprecated( 'Sanitizer::attributeWhitelist' );
310 $this->hideDeprecated( 'Sanitizer::setupAttributeWhitelist' );
311 $actual = Sanitizer
::attributeWhitelist( $element );
312 $this->assertArrayEquals( $attribs, $actual );
316 * @dataProvider provideAttributeWhitelist
317 * @covers Sanitizer::attributeWhitelistInternal
319 public function testAttributeWhitelistInternal( $element, $attribs ) {
320 $sanitizer = TestingAccessWrapper
::newFromClass( Sanitizer
::class );
321 $actual = $sanitizer->attributeWhitelistInternal( $element );
322 $this->assertArrayEquals( $attribs, array_keys( $actual ) );
325 public function provideAttributeWhitelist() {
326 /** [ <element>, [ <good attribute 1>, <good attribute 2>, ...] ] */
328 [ 'math', [ 'class', 'style', 'id', 'title' ] ],
329 [ 'meta', [ 'itemprop', 'content' ] ],
330 [ 'link', [ 'itemprop', 'href', 'title' ] ],
335 * @dataProvider provideCssCommentsFixtures
336 * @covers Sanitizer::checkCss
338 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
339 $this->assertEquals( $expected,
340 Sanitizer
::checkCss( $css ),
345 public static function provideCssCommentsFixtures() {
346 /** [ <expected>, <css>, [message] ] */
348 // Valid comments spanning entire input
350 [ '/* comment */', '/* comment */' ],
354 [ 'display: block;', "display:/* foo */block;" ],
355 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
356 'Backslash-escaped comments must be stripped (T30450)' ],
357 [ '', '/* unfinished comment structure',
358 'Remove anything after a comment-start token' ],
359 [ '', "\\2f\\2a unifinished comment'",
360 'Remove anything after a backslash-escaped comment-start token' ],
362 '/* insecure input */',
363 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
364 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
367 '/* insecure input */',
368 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
369 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
371 [ '/* insecure input */', 'width: expression(1+1);' ],
372 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
373 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
374 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
375 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
377 '/* insecure input */',
378 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
381 '/* insecure input */',
382 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
384 [ '/* insecure input */', 'foo: attr( title, url );' ],
385 [ '/* insecure input */', 'foo: attr( title url );' ],
386 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
391 * @dataProvider provideEscapeHtmlAllowEntities
392 * @covers Sanitizer::escapeHtmlAllowEntities
394 public function testEscapeHtmlAllowEntities( $expected, $html ) {
397 Sanitizer
::escapeHtmlAllowEntities( $html )
401 public static function provideEscapeHtmlAllowEntities() {
404 [ 'a¡b', 'a¡b' ],
405 [ 'foo'bar', "foo'bar" ],
406 [ '<script>foo</script>', '<script>foo</script>' ],
411 * Test Sanitizer::escapeId
413 * @dataProvider provideEscapeId
414 * @covers Sanitizer::escapeId
416 public function testEscapeId( $input, $output ) {
419 Sanitizer
::escapeId( $input, [ 'noninitial', 'legacy' ] )
423 public static function provideEscapeId() {
440 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
441 [ 'A&B&C&amp;D&amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
446 * Test escapeIdReferenceList for consistency with escapeIdForAttribute
448 * @dataProvider provideEscapeIdReferenceList
449 * @covers Sanitizer::escapeIdReferenceList
451 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
453 Sanitizer
::escapeIdReferenceList( $referenceList ),
454 Sanitizer
::escapeIdForAttribute( $id1 )
456 . Sanitizer
::escapeIdForAttribute( $id2 )
460 public static function provideEscapeIdReferenceList() {
461 /** [ <reference list>, <individual id 1>, <individual id 2> ] */
463 [ 'foo bar', 'foo', 'bar' ],
464 [ '#1 #2', '#1', '#2' ],
465 [ '+1 +2', '+1', '+2' ],
470 * @dataProvider provideIsReservedDataAttribute
471 * @covers Sanitizer::isReservedDataAttribute
473 public function testIsReservedDataAttribute( $attr, $expected ) {
474 $this->assertSame( $expected, Sanitizer
::isReservedDataAttribute( $attr ) );
477 public static function provideIsReservedDataAttribute() {
481 [ 'data-foo', false ],
483 [ 'data-ooui', true ],
484 [ 'data-parsoid', true ],
485 [ 'data-mw-foo', true ],
486 [ 'data-ooui-foo', true ],
487 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
492 * @dataProvider provideEscapeIdForStuff
494 * @covers Sanitizer::escapeIdForAttribute()
495 * @covers Sanitizer::escapeIdForLink()
496 * @covers Sanitizer::escapeIdForExternalInterwiki()
497 * @covers Sanitizer::escapeIdInternal()
499 * @param string $stuff
500 * @param string[] $config
502 * @param string|false $expected
503 * @param int|null $mode
505 public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
506 $func = "Sanitizer::escapeIdFor{$stuff}";
507 $iwFlavor = array_pop( $config );
508 $this->setMwGlobals( [
509 'wgFragmentMode' => $config,
510 'wgExternalInterwikiFragmentMode' => $iwFlavor,
512 $escaped = call_user_func( $func, $id, $mode );
513 self
::assertEquals( $expected, $escaped );
516 public function provideEscapeIdForStuff() {
517 // Test inputs and outputs
518 $text = 'foo тест_#%!\'()[]:<>&&&amp;';
519 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
520 '.26.26amp.3B.26amp.3Bamp.3B';
521 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&&amp;';
523 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
524 $legacy = [ 'legacy', 'legacy' ];
525 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
526 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
527 $new = [ 'html5', 'legacy' ];
528 $allNew = [ 'html5', 'html5' ];
531 // Pure legacy: how MW worked before 2017
532 [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer
::ID_PRIMARY
],
533 [ 'Attribute', $legacy, $text, false, Sanitizer
::ID_FALLBACK
],
534 [ 'Link', $legacy, $text, $legacyEncoded ],
535 [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
537 // Transition to a new world: legacy links with HTML5 fallback
538 [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer
::ID_PRIMARY
],
539 [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer
::ID_FALLBACK
],
540 [ 'Link', $legacyNew, $text, $legacyEncoded ],
541 [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
543 // New world: HTML5 links, legacy fallbacks
544 [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer
::ID_PRIMARY
],
545 [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer
::ID_FALLBACK
],
546 [ 'Link', $newLegacy, $text, $html5Encoded ],
547 [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
549 // Distant future: no legacy fallbacks, but still linking to leagacy wikis
550 [ 'Attribute', $new, $text, $html5Encoded, Sanitizer
::ID_PRIMARY
],
551 [ 'Attribute', $new, $text, false, Sanitizer
::ID_FALLBACK
],
552 [ 'Link', $new, $text, $html5Encoded ],
553 [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
555 // Just before the heat death of universe: external interwikis are also HTML5 \m/
556 [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer
::ID_PRIMARY
],
557 [ 'Attribute', $allNew, $text, false, Sanitizer
::ID_FALLBACK
],
558 [ 'Link', $allNew, $text, $html5Encoded ],
559 [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
564 * @dataProvider provideStripAllTags
566 * @covers Sanitizer::stripAllTags()
567 * @covers RemexStripTagHandler
569 * @param string $input
570 * @param string $expected
572 public function testStripAllTags( $input, $expected ) {
573 $this->assertEquals( $expected, Sanitizer
::stripAllTags( $input ) );
576 public function provideStripAllTags() {
578 [ '<p>Foo</p>', 'Foo' ],
579 [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
580 [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
581 [ '<p>Hello <strong> world café</p>', 'Hello <strong> world café' ],
583 '<p><small data-foo=\'bar"<baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
586 [ '1<span class="<?php">2</span>3', '123' ],
587 [ '1<span class="<?">2</span>3', '123' ],
588 [ '<th>1</th><td>2</td>', '1 2' ],
593 * @expectedException InvalidArgumentException
594 * @covers Sanitizer::escapeIdInternal()
596 public function testInvalidFragmentThrows() {
597 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
598 Sanitizer
::escapeIdForAttribute( 'This should throw' );
602 * @expectedException UnexpectedValueException
603 * @covers Sanitizer::escapeIdForAttribute()
605 public function testNoPrimaryFragmentModeThrows() {
606 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
607 Sanitizer
::escapeIdForAttribute( 'This should throw' );
611 * @expectedException UnexpectedValueException
612 * @covers Sanitizer::escapeIdForLink()
614 public function testNoPrimaryFragmentModeThrows2() {
615 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
616 Sanitizer
::escapeIdForLink( 'This should throw' );