4 * @todo Tests covering decodeCharReferences can be refactored into a single
5 * method and dataprovider.
9 class SanitizerUnitTest
extends MediaWikiUnitTestCase
{
12 * @covers Sanitizer::decodeCharReferences
14 public function testDecodeNamedEntities() {
17 Sanitizer
::decodeCharReferences( 'école' ),
18 'decode named entities'
23 * @covers Sanitizer::decodeCharReferences
25 public function testDecodeNumericEntities() {
27 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
28 Sanitizer
::decodeCharReferences( "Ĉio bonas dans l'école!" ),
29 'decode numeric entities'
34 * @covers Sanitizer::decodeCharReferences
36 public function testDecodeMixedEntities() {
38 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
39 Sanitizer
::decodeCharReferences( "Ĉio bonas dans l'école!" ),
40 'decode mixed numeric/named entities'
45 * @covers Sanitizer::decodeCharReferences
47 public function testDecodeMixedComplexEntities() {
49 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas Ĉio dans l'école)",
50 Sanitizer
::decodeCharReferences(
51 "Ĉio bonas dans l'école! (mais pas Ĉio dans l'école)"
53 'decode mixed complex entities'
58 * @covers Sanitizer::decodeCharReferences
60 public function testInvalidAmpersand() {
63 Sanitizer
::decodeCharReferences( 'a & b' ),
69 * @covers Sanitizer::decodeCharReferences
71 public function testInvalidEntities() {
74 Sanitizer
::decodeCharReferences( '&foo;' ),
75 'Invalid named entity'
80 * @covers Sanitizer::decodeCharReferences
82 public function testInvalidNumberedEntities() {
84 UtfNormal\Constants
::UTF8_REPLACEMENT
,
85 Sanitizer
::decodeCharReferences( "�" ),
86 'Invalid numbered entity'
91 * @dataProvider provideTagAttributesToDecode
92 * @covers Sanitizer::decodeTagAttributes
94 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
95 $this->assertEquals( $expected,
96 Sanitizer
::decodeTagAttributes( $attributes ),
101 public static function provideTagAttributesToDecode() {
103 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
104 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
105 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
106 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
107 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
108 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
109 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
110 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
112 [ 'foo' => 'bar', 'baz' => 'foo' ],
113 'foo=\'bar\' baz="foo"',
117 [ 'foo' => 'bar', 'baz' => 'foo' ],
118 'foo=\'bar\' baz="foo"',
122 [ 'foo' => 'bar', 'baz' => 'foo' ],
123 'foo=\'bar\' baz="foo"',
126 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
127 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
128 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
129 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
132 [ [], '-foo=bar', 'Leading - is forbidden' ],
133 [ [], '.foo=bar', 'Leading . is forbidden' ],
134 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
135 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
136 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
137 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
138 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
140 # This bit is more relaxed than XML rules, but some extensions use
141 # it, like ProofreadPage (see T29539)
142 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
143 [ [], 'foo$=baz', 'Symbols are not allowed' ],
144 [ [], 'foo@=baz', 'Symbols are not allowed' ],
145 [ [], 'foo~=baz', 'Symbols are not allowed' ],
147 [ 'foo' => '1[#^`*%w/(' ],
149 'All kind of characters are allowed as values'
152 [ 'foo' => '1[#^`*%\'w/(' ],
153 'foo="1[#^`*%\'w/("',
154 'Double quotes are allowed if quoted by single quotes'
157 [ 'foo' => '1[#^`*%"w/(' ],
158 'foo=\'1[#^`*%"w/(\'',
159 'Single quotes are allowed if quoted by double quotes'
161 [ [ 'foo' => '&"' ], 'foo=&"', 'Special chars can be provided as entities' ],
162 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
167 * @dataProvider provideCssCommentsFixtures
168 * @covers Sanitizer::checkCss
170 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
171 $this->assertEquals( $expected,
172 Sanitizer
::checkCss( $css ),
177 public static function provideCssCommentsFixtures() {
178 /** [ <expected>, <css>, [message] ] */
180 // Valid comments spanning entire input
182 [ '/* comment */', '/* comment */' ],
186 [ 'display: block;', "display:/* foo */block;" ],
187 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
188 'Backslash-escaped comments must be stripped (T30450)' ],
189 [ '', '/* unfinished comment structure',
190 'Remove anything after a comment-start token' ],
191 [ '', "\\2f\\2a unifinished comment'",
192 'Remove anything after a backslash-escaped comment-start token' ],
194 '/* insecure input */',
195 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
196 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
199 '/* insecure input */',
200 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
201 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
203 [ '/* insecure input */', 'width: expression(1+1);' ],
204 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
205 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
206 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
207 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
209 '/* insecure input */',
210 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
213 '/* insecure input */',
214 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
216 [ '/* insecure input */', 'foo: attr( title, url );' ],
217 [ '/* insecure input */', 'foo: attr( title url );' ],
218 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
223 * @dataProvider provideEscapeHtmlAllowEntities
224 * @covers Sanitizer::escapeHtmlAllowEntities
226 public function testEscapeHtmlAllowEntities( $expected, $html ) {
229 Sanitizer
::escapeHtmlAllowEntities( $html )
233 public static function provideEscapeHtmlAllowEntities() {
236 [ 'a¡b', 'a¡b' ],
237 [ 'foo'bar', "foo'bar" ],
238 [ '<script>foo</script>', '<script>foo</script>' ],
243 * Test Sanitizer::escapeId
245 * @dataProvider provideEscapeId
246 * @covers Sanitizer::escapeId
248 public function testEscapeId( $input, $output ) {
251 Sanitizer
::escapeId( $input, [ 'noninitial', 'legacy' ] )
255 public static function provideEscapeId() {
272 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
273 [ 'A&B&C&amp;D&amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
278 * @dataProvider provideIsReservedDataAttribute
279 * @covers Sanitizer::isReservedDataAttribute
281 public function testIsReservedDataAttribute( $attr, $expected ) {
282 $this->assertSame( $expected, Sanitizer
::isReservedDataAttribute( $attr ) );
285 public static function provideIsReservedDataAttribute() {
289 [ 'data-foo', false ],
291 [ 'data-ooui', true ],
292 [ 'data-parsoid', true ],
293 [ 'data-mw-foo', true ],
294 [ 'data-ooui-foo', true ],
295 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
300 * @dataProvider provideStripAllTags
302 * @covers Sanitizer::stripAllTags()
303 * @covers RemexStripTagHandler
305 * @param string $input
306 * @param string $expected
308 public function testStripAllTags( $input, $expected ) {
309 $this->assertEquals( $expected, Sanitizer
::stripAllTags( $input ) );
312 public function provideStripAllTags() {
314 [ '<p>Foo</p>', 'Foo' ],
315 [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
316 [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
317 [ '<p>Hello <strong> world café</p>', 'Hello <strong> world café' ],
319 '<p><small data-foo=\'bar"<baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
322 [ '1<span class="<?php">2</span>3', '123' ],
323 [ '1<span class="<?">2</span>3', '123' ],
324 [ '<th>1</th><td>2</td>', '1 2' ],