Merge "Selenium: replace UserLoginPage with BlankPage where possible"
[lhc/web/wiklou.git] / tests / phpunit / unit / includes / parser / SanitizerUnitTest.php
1 <?php
2
3 /**
4 * @todo Tests covering decodeCharReferences can be refactored into a single
5 * method and dataprovider.
6 *
7 * @group Sanitizer
8 */
9 class SanitizerUnitTest extends MediaWikiUnitTestCase {
10
11 /**
12 * @covers Sanitizer::decodeCharReferences
13 */
14 public function testDecodeNamedEntities() {
15 $this->assertEquals(
16 "\xc3\xa9cole",
17 Sanitizer::decodeCharReferences( '&eacute;cole' ),
18 'decode named entities'
19 );
20 }
21
22 /**
23 * @covers Sanitizer::decodeCharReferences
24 */
25 public function testDecodeNumericEntities() {
26 $this->assertEquals(
27 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
28 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
29 'decode numeric entities'
30 );
31 }
32
33 /**
34 * @covers Sanitizer::decodeCharReferences
35 */
36 public function testDecodeMixedEntities() {
37 $this->assertEquals(
38 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
39 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
40 'decode mixed numeric/named entities'
41 );
42 }
43
44 /**
45 * @covers Sanitizer::decodeCharReferences
46 */
47 public function testDecodeMixedComplexEntities() {
48 $this->assertEquals(
49 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
50 Sanitizer::decodeCharReferences(
51 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
52 ),
53 'decode mixed complex entities'
54 );
55 }
56
57 /**
58 * @covers Sanitizer::decodeCharReferences
59 */
60 public function testInvalidAmpersand() {
61 $this->assertEquals(
62 'a & b',
63 Sanitizer::decodeCharReferences( 'a & b' ),
64 'Invalid ampersand'
65 );
66 }
67
68 /**
69 * @covers Sanitizer::decodeCharReferences
70 */
71 public function testInvalidEntities() {
72 $this->assertEquals(
73 '&foo;',
74 Sanitizer::decodeCharReferences( '&foo;' ),
75 'Invalid named entity'
76 );
77 }
78
79 /**
80 * @covers Sanitizer::decodeCharReferences
81 */
82 public function testInvalidNumberedEntities() {
83 $this->assertEquals(
84 UtfNormal\Constants::UTF8_REPLACEMENT,
85 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
86 'Invalid numbered entity'
87 );
88 }
89
90 /**
91 * @dataProvider provideTagAttributesToDecode
92 * @covers Sanitizer::decodeTagAttributes
93 */
94 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
95 $this->assertEquals( $expected,
96 Sanitizer::decodeTagAttributes( $attributes ),
97 $message
98 );
99 }
100
101 public static function provideTagAttributesToDecode() {
102 return [
103 [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
104 [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
105 [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
106 [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
107 [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
108 [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
109 [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
110 [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
111 [
112 [ 'foo' => 'bar', 'baz' => 'foo' ],
113 'foo=\'bar\' baz="foo"',
114 'Several attributes'
115 ],
116 [
117 [ 'foo' => 'bar', 'baz' => 'foo' ],
118 'foo=\'bar\' baz="foo"',
119 'Several attributes'
120 ],
121 [
122 [ 'foo' => 'bar', 'baz' => 'foo' ],
123 'foo=\'bar\' baz="foo"',
124 'Several attributes'
125 ],
126 [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
127 [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
128 [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
129 [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
130
131 # Invalid beginning
132 [ [], '-foo=bar', 'Leading - is forbidden' ],
133 [ [], '.foo=bar', 'Leading . is forbidden' ],
134 [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
135 [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
136 [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
137 [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
138 [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
139
140 # This bit is more relaxed than XML rules, but some extensions use
141 # it, like ProofreadPage (see T29539)
142 [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
143 [ [], 'foo$=baz', 'Symbols are not allowed' ],
144 [ [], 'foo@=baz', 'Symbols are not allowed' ],
145 [ [], 'foo~=baz', 'Symbols are not allowed' ],
146 [
147 [ 'foo' => '1[#^`*%w/(' ],
148 'foo=1[#^`*%w/(',
149 'All kind of characters are allowed as values'
150 ],
151 [
152 [ 'foo' => '1[#^`*%\'w/(' ],
153 'foo="1[#^`*%\'w/("',
154 'Double quotes are allowed if quoted by single quotes'
155 ],
156 [
157 [ 'foo' => '1[#^`*%"w/(' ],
158 'foo=\'1[#^`*%"w/(\'',
159 'Single quotes are allowed if quoted by double quotes'
160 ],
161 [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
162 [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
163 ];
164 }
165
166 /**
167 * @dataProvider provideCssCommentsFixtures
168 * @covers Sanitizer::checkCss
169 */
170 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
171 $this->assertEquals( $expected,
172 Sanitizer::checkCss( $css ),
173 $message
174 );
175 }
176
177 public static function provideCssCommentsFixtures() {
178 /** [ <expected>, <css>, [message] ] */
179 return [
180 // Valid comments spanning entire input
181 [ '/**/', '/**/' ],
182 [ '/* comment */', '/* comment */' ],
183 // Weird stuff
184 [ ' ', '/****/' ],
185 [ ' ', '/* /* */' ],
186 [ 'display: block;', "display:/* foo */block;" ],
187 [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
188 'Backslash-escaped comments must be stripped (T30450)' ],
189 [ '', '/* unfinished comment structure',
190 'Remove anything after a comment-start token' ],
191 [ '', "\\2f\\2a unifinished comment'",
192 'Remove anything after a backslash-escaped comment-start token' ],
193 [
194 '/* insecure input */',
195 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
196 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
197 ],
198 [
199 '/* insecure input */',
200 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
201 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
202 ],
203 [ '/* insecure input */', 'width: expression(1+1);' ],
204 [ '/* insecure input */', 'background-image: image(asdf.png);' ],
205 [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
206 [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
207 [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
208 [
209 '/* insecure input */',
210 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
211 ],
212 [
213 '/* insecure input */',
214 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
215 ],
216 [ '/* insecure input */', 'foo: attr( title, url );' ],
217 [ '/* insecure input */', 'foo: attr( title url );' ],
218 [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
219 ];
220 }
221
222 /**
223 * @dataProvider provideEscapeHtmlAllowEntities
224 * @covers Sanitizer::escapeHtmlAllowEntities
225 */
226 public function testEscapeHtmlAllowEntities( $expected, $html ) {
227 $this->assertEquals(
228 $expected,
229 Sanitizer::escapeHtmlAllowEntities( $html )
230 );
231 }
232
233 public static function provideEscapeHtmlAllowEntities() {
234 return [
235 [ 'foo', 'foo' ],
236 [ 'a¡b', 'a&#161;b' ],
237 [ 'foo&#039;bar', "foo'bar" ],
238 [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
239 ];
240 }
241
242 /**
243 * Test Sanitizer::escapeId
244 *
245 * @dataProvider provideEscapeId
246 * @covers Sanitizer::escapeId
247 */
248 public function testEscapeId( $input, $output ) {
249 $this->assertEquals(
250 $output,
251 Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
252 );
253 }
254
255 public static function provideEscapeId() {
256 return [
257 [ '+', '.2B' ],
258 [ '&', '.26' ],
259 [ '=', '.3D' ],
260 [ ':', ':' ],
261 [ ';', '.3B' ],
262 [ '@', '.40' ],
263 [ '$', '.24' ],
264 [ '-_.', '-_.' ],
265 [ '!', '.21' ],
266 [ '*', '.2A' ],
267 [ '/', '.2F' ],
268 [ '[]', '.5B.5D' ],
269 [ '<>', '.3C.3E' ],
270 [ '\'', '.27' ],
271 [ '§', '.C2.A7' ],
272 [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
273 [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
274 ];
275 }
276
277 /**
278 * Test escapeIdReferenceList for consistency with escapeIdForAttribute
279 *
280 * @dataProvider provideEscapeIdReferenceList
281 * @covers Sanitizer::escapeIdReferenceList
282 */
283 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
284 $this->assertEquals(
285 Sanitizer::escapeIdReferenceList( $referenceList ),
286 Sanitizer::escapeIdForAttribute( $id1 )
287 . ' '
288 . Sanitizer::escapeIdForAttribute( $id2 )
289 );
290 }
291
292 public static function provideEscapeIdReferenceList() {
293 /** [ <reference list>, <individual id 1>, <individual id 2> ] */
294 return [
295 [ 'foo bar', 'foo', 'bar' ],
296 [ '#1 #2', '#1', '#2' ],
297 [ '+1 +2', '+1', '+2' ],
298 ];
299 }
300
301 /**
302 * @dataProvider provideIsReservedDataAttribute
303 * @covers Sanitizer::isReservedDataAttribute
304 */
305 public function testIsReservedDataAttribute( $attr, $expected ) {
306 $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
307 }
308
309 public static function provideIsReservedDataAttribute() {
310 return [
311 [ 'foo', false ],
312 [ 'data', false ],
313 [ 'data-foo', false ],
314 [ 'data-mw', true ],
315 [ 'data-ooui', true ],
316 [ 'data-parsoid', true ],
317 [ 'data-mw-foo', true ],
318 [ 'data-ooui-foo', true ],
319 [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
320 ];
321 }
322
323 /**
324 * @dataProvider provideStripAllTags
325 *
326 * @covers Sanitizer::stripAllTags()
327 * @covers RemexStripTagHandler
328 *
329 * @param string $input
330 * @param string $expected
331 */
332 public function testStripAllTags( $input, $expected ) {
333 $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
334 }
335
336 public function provideStripAllTags() {
337 return [
338 [ '<p>Foo</p>', 'Foo' ],
339 [ '<p id="one">Foo</p><p id="two">Bar</p>', 'Foo Bar' ],
340 [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
341 [ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
342 [
343 '<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
344 'Bar Whee!'
345 ],
346 [ '1<span class="<?php">2</span>3', '123' ],
347 [ '1<span class="<?">2</span>3', '123' ],
348 [ '<th>1</th><td>2</td>', '1 2' ],
349 ];
350 }
351
352 }