Add a test for mixed /*@noflip*/ and /*@embed*/ CSS annotations
[lhc/web/wiklou.git] / tests / phpunit / includes / SanitizerTest.php
1 <?php
2
3 /**
4 * @todo Tests covering decodeCharReferences can be refactored into a single
5 * method and dataprovider.
6 */
7 class SanitizerTest extends MediaWikiTestCase {
8
9 protected function setUp() {
10 parent::setUp();
11
12 AutoLoader::loadClass( 'Sanitizer' );
13 }
14
15 /**
16 * @covers Sanitizer::decodeCharReferences
17 */
18 public function testDecodeNamedEntities() {
19 $this->assertEquals(
20 "\xc3\xa9cole",
21 Sanitizer::decodeCharReferences( '&eacute;cole' ),
22 'decode named entities'
23 );
24 }
25
26 /**
27 * @covers Sanitizer::decodeCharReferences
28 */
29 public function testDecodeNumericEntities() {
30 $this->assertEquals(
31 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
32 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
33 'decode numeric entities'
34 );
35 }
36
37 /**
38 * @covers Sanitizer::decodeCharReferences
39 */
40 public function testDecodeMixedEntities() {
41 $this->assertEquals(
42 "\xc4\x88io bonas dans l'\xc3\xa9cole!",
43 Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
44 'decode mixed numeric/named entities'
45 );
46 }
47
48 /**
49 * @covers Sanitizer::decodeCharReferences
50 */
51 public function testDecodeMixedComplexEntities() {
52 $this->assertEquals(
53 "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
54 Sanitizer::decodeCharReferences(
55 "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
56 ),
57 'decode mixed complex entities'
58 );
59 }
60
61 /**
62 * @covers Sanitizer::decodeCharReferences
63 */
64 public function testInvalidAmpersand() {
65 $this->assertEquals(
66 'a & b',
67 Sanitizer::decodeCharReferences( 'a & b' ),
68 'Invalid ampersand'
69 );
70 }
71
72 /**
73 * @covers Sanitizer::decodeCharReferences
74 */
75 public function testInvalidEntities() {
76 $this->assertEquals(
77 '&foo;',
78 Sanitizer::decodeCharReferences( '&foo;' ),
79 'Invalid named entity'
80 );
81 }
82
83 /**
84 * @covers Sanitizer::decodeCharReferences
85 */
86 public function testInvalidNumberedEntities() {
87 $this->assertEquals(
88 UTF8_REPLACEMENT,
89 Sanitizer::decodeCharReferences( "&#88888888888888;" ),
90 'Invalid numbered entity'
91 );
92 }
93
94 /**
95 * @covers Sanitizer::removeHTMLtags
96 * @dataProvider provideHtml5Tags
97 *
98 * @param string $tag Name of an HTML5 element (ie: 'video')
99 * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
100 */
101 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
102 $this->setMwGlobals( array(
103 'wgUseTidy' => false
104 ) );
105
106 if ( $escaped ) {
107 $this->assertEquals( "&lt;$tag&gt;",
108 Sanitizer::removeHTMLtags( "<$tag>" )
109 );
110 } else {
111 $this->assertEquals( "<$tag></$tag>\n",
112 Sanitizer::removeHTMLtags( "<$tag>" )
113 );
114 }
115 }
116
117 /**
118 * Provide HTML5 tags
119 */
120 public static function provideHtml5Tags() {
121 $ESCAPED = true; # We want tag to be escaped
122 $VERBATIM = false; # We want to keep the tag
123 return array(
124 array( 'data', $VERBATIM ),
125 array( 'mark', $VERBATIM ),
126 array( 'time', $VERBATIM ),
127 array( 'video', $ESCAPED ),
128 );
129 }
130
131 function dataRemoveHTMLtags() {
132 return array(
133 // former testSelfClosingTag
134 array(
135 '<div>Hello world</div />',
136 '<div>Hello world</div>',
137 'Self-closing closing div'
138 ),
139 // Make sure special nested HTML5 semantics are not broken
140 // http://www.whatwg.org/html/text-level-semantics.html#the-kbd-element
141 array(
142 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
143 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
144 'Nested <kbd>.'
145 ),
146 // http://www.whatwg.org/html/text-level-semantics.html#the-sub-and-sup-elements
147 array(
148 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
149 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
150 'Nested <var>.'
151 ),
152 // http://www.whatwg.org/html/text-level-semantics.html#the-dfn-element
153 array(
154 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
155 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
156 '<abbr> inside <dfn>',
157 ),
158 );
159 }
160
161 /**
162 * @dataProvider dataRemoveHTMLtags
163 * @covers Sanitizer::removeHTMLtags
164 */
165 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
166 $GLOBALS['wgUseTidy'] = false;
167 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
168 }
169
170 /**
171 * @dataProvider provideTagAttributesToDecode
172 * @covers Sanitizer::decodeTagAttributes
173 */
174 public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
175 $this->assertEquals( $expected,
176 Sanitizer::decodeTagAttributes( $attributes ),
177 $message
178 );
179 }
180
181 public static function provideTagAttributesToDecode() {
182 return array(
183 array( array( 'foo' => 'bar' ), 'foo=bar', 'Unquoted attribute' ),
184 array( array( 'foo' => 'bar' ), ' foo = bar ', 'Spaced attribute' ),
185 array( array( 'foo' => 'bar' ), 'foo="bar"', 'Double-quoted attribute' ),
186 array( array( 'foo' => 'bar' ), 'foo=\'bar\'', 'Single-quoted attribute' ),
187 array(
188 array( 'foo' => 'bar', 'baz' => 'foo' ),
189 'foo=\'bar\' baz="foo"',
190 'Several attributes'
191 ),
192 array(
193 array( 'foo' => 'bar', 'baz' => 'foo' ),
194 'foo=\'bar\' baz="foo"',
195 'Several attributes'
196 ),
197 array(
198 array( 'foo' => 'bar', 'baz' => 'foo' ),
199 'foo=\'bar\' baz="foo"',
200 'Several attributes'
201 ),
202 array( array( ':foo' => 'bar' ), ':foo=\'bar\'', 'Leading :' ),
203 array( array( '_foo' => 'bar' ), '_foo=\'bar\'', 'Leading _' ),
204 array( array( 'foo' => 'bar' ), 'Foo=\'bar\'', 'Leading capital' ),
205 array( array( 'foo' => 'BAR' ), 'FOO=BAR', 'Attribute keys are normalized to lowercase' ),
206
207 # Invalid beginning
208 array( array(), '-foo=bar', 'Leading - is forbidden' ),
209 array( array(), '.foo=bar', 'Leading . is forbidden' ),
210 array( array( 'foo-bar' => 'bar' ), 'foo-bar=bar', 'A - is allowed inside the attribute' ),
211 array( array( 'foo-' => 'bar' ), 'foo-=bar', 'A - is allowed inside the attribute' ),
212 array( array( 'foo.bar' => 'baz' ), 'foo.bar=baz', 'A . is allowed inside the attribute' ),
213 array( array( 'foo.' => 'baz' ), 'foo.=baz', 'A . is allowed as last character' ),
214 array( array( 'foo6' => 'baz' ), 'foo6=baz', 'Numbers are allowed' ),
215
216 # This bit is more relaxed than XML rules, but some extensions use
217 # it, like ProofreadPage (see bug 27539)
218 array( array( '1foo' => 'baz' ), '1foo=baz', 'Leading numbers are allowed' ),
219 array( array(), 'foo$=baz', 'Symbols are not allowed' ),
220 array( array(), 'foo@=baz', 'Symbols are not allowed' ),
221 array( array(), 'foo~=baz', 'Symbols are not allowed' ),
222 array(
223 array( 'foo' => '1[#^`*%w/(' ),
224 'foo=1[#^`*%w/(',
225 'All kind of characters are allowed as values'
226 ),
227 array(
228 array( 'foo' => '1[#^`*%\'w/(' ),
229 'foo="1[#^`*%\'w/("',
230 'Double quotes are allowed if quoted by single quotes'
231 ),
232 array(
233 array( 'foo' => '1[#^`*%"w/(' ),
234 'foo=\'1[#^`*%"w/(\'',
235 'Single quotes are allowed if quoted by double quotes'
236 ),
237 array( array( 'foo' => '&"' ), 'foo=&amp;&quot;', 'Special chars can be provided as entities' ),
238 array( array( 'foo' => '&foobar;' ), 'foo=&foobar;', 'Entity-like items are accepted' ),
239 );
240 }
241
242 /**
243 * @dataProvider provideDeprecatedAttributes
244 * @covers Sanitizer::fixTagAttributes
245 */
246 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
247 $this->assertEquals( " $inputAttr",
248 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
249 $message
250 );
251 }
252
253 public static function provideDeprecatedAttributes() {
254 /** array( <attribute>, <element>, [message] ) */
255 return array(
256 array( 'clear="left"', 'br' ),
257 array( 'clear="all"', 'br' ),
258 array( 'width="100"', 'td' ),
259 array( 'nowrap="true"', 'td' ),
260 array( 'nowrap=""', 'td' ),
261 array( 'align="right"', 'td' ),
262 array( 'align="center"', 'table' ),
263 array( 'align="left"', 'tr' ),
264 array( 'align="center"', 'div' ),
265 array( 'align="left"', 'h1' ),
266 array( 'align="left"', 'p' ),
267 );
268 }
269
270 /**
271 * @dataProvider provideCssCommentsFixtures
272 * @covers Sanitizer::checkCss
273 */
274 public function testCssCommentsChecking( $expected, $css, $message = '' ) {
275 $this->assertEquals( $expected,
276 Sanitizer::checkCss( $css ),
277 $message
278 );
279 }
280
281 public static function provideCssCommentsFixtures() {
282 /** array( <expected>, <css>, [message] ) */
283 return array(
284 // Valid comments spanning entire input
285 array( '/**/', '/**/' ),
286 array( '/* comment */', '/* comment */' ),
287 // Weird stuff
288 array( ' ', '/****/' ),
289 array( ' ', '/* /* */' ),
290 array( 'display: block;', "display:/* foo */block;" ),
291 array( 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
292 'Backslash-escaped comments must be stripped (bug 28450)' ),
293 array( '', '/* unfinished comment structure',
294 'Remove anything after a comment-start token' ),
295 array( '', "\\2f\\2a unifinished comment'",
296 'Remove anything after a backslash-escaped comment-start token' ),
297 array(
298 '/* insecure input */',
299 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
300 . '(src=\'asdf.png\',sizingMethod=\'scale\');'
301 ),
302 array(
303 '/* insecure input */',
304 '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
305 . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
306 ),
307 array( '/* insecure input */', 'width: expression(1+1);' ),
308 array( '/* insecure input */', 'background-image: image(asdf.png);' ),
309 array( '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ),
310 array( '/* insecure input */', 'background-image: -moz-image(asdf.png);' ),
311 array( '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ),
312 array(
313 '/* insecure input */',
314 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
315 ),
316 array(
317 '/* insecure input */',
318 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
319 ),
320 );
321 }
322
323 /**
324 * Test for support or lack of support for specific attributes in the attribute whitelist.
325 */
326 public static function provideAttributeSupport() {
327 /** array( <attributes>, <expected>, <message> ) */
328 return array(
329 array(
330 'div',
331 ' role="presentation"',
332 ' role="presentation"',
333 'Support for WAI-ARIA\'s role="presentation".'
334 ),
335 array( 'div', ' role="main"', '', "Other WAI-ARIA roles are currently not supported." ),
336 );
337 }
338
339 /**
340 * @dataProvider provideAttributeSupport
341 * @covers Sanitizer::fixTagAttributes
342 */
343 public function testAttributeSupport( $tag, $attributes, $expected, $message ) {
344 $this->assertEquals( $expected,
345 Sanitizer::fixTagAttributes( $attributes, $tag ),
346 $message
347 );
348 }
349 }