Merge "Exclude redirects from Special:Fewestrevisions"
[lhc/web/wiklou.git] / tests / phpunit / includes / parser / SanitizerTest.php
1 <?php
2
3 use Wikimedia\TestingAccessWrapper;
4
5 /**
6 * @group Sanitizer
7 */
8 class SanitizerTest extends MediaWikiTestCase {
9
10 protected function tearDown() {
11 MWTidy::destroySingleton();
12 parent::tearDown();
13 }
14
15 /**
16 * @covers Sanitizer::removeHTMLtags
17 * @dataProvider provideHtml5Tags
18 *
19 * @param string $tag Name of an HTML5 element (ie: 'video')
20 * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
21 */
22 public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
23 $this->hideDeprecated( 'disabling tidy' );
24 $this->hideDeprecated( 'MWTidy::setInstance' );
25 MWTidy::setInstance( false );
26
27 if ( $escaped ) {
28 $this->assertEquals( "&lt;$tag&gt;",
29 Sanitizer::removeHTMLtags( "<$tag>" )
30 );
31 } else {
32 $this->assertEquals( "<$tag></$tag>\n",
33 Sanitizer::removeHTMLtags( "<$tag>" )
34 );
35 }
36 }
37
38 /**
39 * Provide HTML5 tags
40 */
41 public static function provideHtml5Tags() {
42 $ESCAPED = true; # We want tag to be escaped
43 $VERBATIM = false; # We want to keep the tag
44 return [
45 [ 'data', $VERBATIM ],
46 [ 'mark', $VERBATIM ],
47 [ 'time', $VERBATIM ],
48 [ 'video', $ESCAPED ],
49 ];
50 }
51
52 function dataRemoveHTMLtags() {
53 return [
54 // former testSelfClosingTag
55 [
56 '<div>Hello world</div />',
57 '<div>Hello world</div>',
58 'Self-closing closing div'
59 ],
60 // Make sure special nested HTML5 semantics are not broken
61 // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
62 [
63 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
64 '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
65 'Nested <kbd>.'
66 ],
67 // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
68 [
69 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
70 '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
71 'Nested <var>.'
72 ],
73 // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
74 [
75 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
76 '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
77 '<abbr> inside <dfn>',
78 ],
79 ];
80 }
81
82 /**
83 * @dataProvider dataRemoveHTMLtags
84 * @covers Sanitizer::removeHTMLtags
85 */
86 public function testRemoveHTMLtags( $input, $output, $msg = null ) {
87 $this->hideDeprecated( 'disabling tidy' );
88 $this->hideDeprecated( 'MWTidy::setInstance' );
89 MWTidy::setInstance( false );
90 $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
91 }
92
93 /**
94 * @dataProvider provideDeprecatedAttributes
95 * @covers Sanitizer::fixTagAttributes
96 * @covers Sanitizer::validateTagAttributes
97 * @covers Sanitizer::validateAttributes
98 */
99 public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
100 $this->assertEquals( " $inputAttr",
101 Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
102 $message
103 );
104 }
105
106 public static function provideDeprecatedAttributes() {
107 /** [ <attribute>, <element>, [message] ] */
108 return [
109 [ 'clear="left"', 'br' ],
110 [ 'clear="all"', 'br' ],
111 [ 'width="100"', 'td' ],
112 [ 'nowrap="true"', 'td' ],
113 [ 'nowrap=""', 'td' ],
114 [ 'align="right"', 'td' ],
115 [ 'align="center"', 'table' ],
116 [ 'align="left"', 'tr' ],
117 [ 'align="center"', 'div' ],
118 [ 'align="left"', 'h1' ],
119 [ 'align="left"', 'p' ],
120 ];
121 }
122
123 /**
124 * @dataProvider provideValidateTagAttributes
125 * @covers Sanitizer::validateTagAttributes
126 * @covers Sanitizer::validateAttributes
127 */
128 public function testValidateTagAttributes( $element, $attribs, $expected ) {
129 $actual = Sanitizer::validateTagAttributes( $attribs, $element );
130 $this->assertArrayEquals( $expected, $actual, false, true );
131 }
132
133 public static function provideValidateTagAttributes() {
134 return [
135 [ 'math',
136 [ 'id' => 'foo bar', 'bogus' => 'stripped', 'data-foo' => 'bar' ],
137 [ 'id' => 'foo_bar', 'data-foo' => 'bar' ],
138 ],
139 [ 'meta',
140 [ 'id' => 'foo bar', 'itemprop' => 'foo', 'content' => 'bar' ],
141 [ 'itemprop' => 'foo', 'content' => 'bar' ],
142 ],
143 ];
144 }
145
146 /**
147 * @dataProvider provideAttributeWhitelist
148 * @covers Sanitizer::attributeWhitelist
149 */
150 public function testAttributeWhitelist( $element, $attribs ) {
151 $this->hideDeprecated( 'Sanitizer::attributeWhitelist' );
152 $this->hideDeprecated( 'Sanitizer::setupAttributeWhitelist' );
153 $actual = Sanitizer::attributeWhitelist( $element );
154 $this->assertArrayEquals( $attribs, $actual );
155 }
156
157 /**
158 * @dataProvider provideAttributeWhitelist
159 * @covers Sanitizer::attributeWhitelistInternal
160 */
161 public function testAttributeWhitelistInternal( $element, $attribs ) {
162 $sanitizer = TestingAccessWrapper::newFromClass( Sanitizer::class );
163 $actual = $sanitizer->attributeWhitelistInternal( $element );
164 $this->assertArrayEquals( $attribs, array_keys( $actual ) );
165 }
166
167 public function provideAttributeWhitelist() {
168 /** [ <element>, [ <good attribute 1>, <good attribute 2>, ...] ] */
169 return [
170 [ 'math', [ 'class', 'style', 'id', 'title' ] ],
171 [ 'meta', [ 'itemprop', 'content' ] ],
172 [ 'link', [ 'itemprop', 'href', 'title' ] ],
173 ];
174 }
175
176 /**
177 * @dataProvider provideEscapeIdForStuff
178 *
179 * @covers Sanitizer::escapeIdForAttribute()
180 * @covers Sanitizer::escapeIdForLink()
181 * @covers Sanitizer::escapeIdForExternalInterwiki()
182 * @covers Sanitizer::escapeIdInternal()
183 *
184 * @param string $stuff
185 * @param string[] $config
186 * @param string $id
187 * @param string|false $expected
188 * @param int|null $mode
189 */
190 public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
191 $func = "Sanitizer::escapeIdFor{$stuff}";
192 $iwFlavor = array_pop( $config );
193 $this->setMwGlobals( [
194 'wgFragmentMode' => $config,
195 'wgExternalInterwikiFragmentMode' => $iwFlavor,
196 ] );
197 $escaped = call_user_func( $func, $id, $mode );
198 self::assertEquals( $expected, $escaped );
199 }
200
201 public function provideEscapeIdForStuff() {
202 // Test inputs and outputs
203 $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
204 $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
205 '.26.26amp.3B.26amp.3Bamp.3B';
206 $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
207
208 // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
209 $legacy = [ 'legacy', 'legacy' ];
210 $legacyNew = [ 'legacy', 'html5', 'legacy' ];
211 $newLegacy = [ 'html5', 'legacy', 'legacy' ];
212 $new = [ 'html5', 'legacy' ];
213 $allNew = [ 'html5', 'html5' ];
214
215 return [
216 // Pure legacy: how MW worked before 2017
217 [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
218 [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
219 [ 'Link', $legacy, $text, $legacyEncoded ],
220 [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
221
222 // Transition to a new world: legacy links with HTML5 fallback
223 [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
224 [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
225 [ 'Link', $legacyNew, $text, $legacyEncoded ],
226 [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
227
228 // New world: HTML5 links, legacy fallbacks
229 [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
230 [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
231 [ 'Link', $newLegacy, $text, $html5Encoded ],
232 [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
233
234 // Distant future: no legacy fallbacks, but still linking to leagacy wikis
235 [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
236 [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
237 [ 'Link', $new, $text, $html5Encoded ],
238 [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
239
240 // Just before the heat death of universe: external interwikis are also HTML5 \m/
241 [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
242 [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
243 [ 'Link', $allNew, $text, $html5Encoded ],
244 [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
245 ];
246 }
247
248 /**
249 * @expectedException InvalidArgumentException
250 * @covers Sanitizer::escapeIdInternal()
251 */
252 public function testInvalidFragmentThrows() {
253 $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
254 Sanitizer::escapeIdForAttribute( 'This should throw' );
255 }
256
257 /**
258 * @expectedException UnexpectedValueException
259 * @covers Sanitizer::escapeIdForAttribute()
260 */
261 public function testNoPrimaryFragmentModeThrows() {
262 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
263 Sanitizer::escapeIdForAttribute( 'This should throw' );
264 }
265
266 /**
267 * @expectedException UnexpectedValueException
268 * @covers Sanitizer::escapeIdForLink()
269 */
270 public function testNoPrimaryFragmentModeThrows2() {
271 $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
272 Sanitizer::escapeIdForLink( 'This should throw' );
273 }
274
275 /**
276 * Test escapeIdReferenceList for consistency with escapeIdForAttribute
277 *
278 * @dataProvider provideEscapeIdReferenceList
279 * @covers Sanitizer::escapeIdReferenceList
280 */
281 public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
282 $this->assertEquals(
283 Sanitizer::escapeIdReferenceList( $referenceList ),
284 Sanitizer::escapeIdForAttribute( $id1 )
285 . ' '
286 . Sanitizer::escapeIdForAttribute( $id2 )
287 );
288 }
289
290 public static function provideEscapeIdReferenceList() {
291 /** [ <reference list>, <individual id 1>, <individual id 2> ] */
292 return [
293 [ 'foo bar', 'foo', 'bar' ],
294 [ '#1 #2', '#1', '#2' ],
295 [ '+1 +2', '+1', '+2' ],
296 ];
297 }
298
299 }