Some bugzilla.wikimedia.org -> phabricator.wikimedia.org changes
[lhc/web/wiklou.git] / tests / phpunit / includes / HtmlFormatterTest.php
1 <?php
2
3 /**
4 * @group HtmlFormatter
5 */
6 class HtmlFormatterTest extends MediaWikiTestCase {
7
8 /**
9 * Use TidySupport to check whether we should use $wgTidyInternal.
10 *
11 * The Tidy extension in HHVM does not support error text return, so it is
12 * nominally usable, but does not pass tests which require error text from
13 * Tidy.
14 */
15 protected function setUp() {
16 parent::setUp();
17 $tidySupport = new TidySupport();
18 $this->setMwGlobals( 'wgTidyInternal', $tidySupport->isInternal() );
19 }
20
21 /**
22 * @dataProvider getHtmlData
23 *
24 * @param string $input
25 * @param string $expectedText
26 * @param array $expectedRemoved
27 * @param callable|bool $callback
28 */
29 public function testTransform( $input, $expectedText,
30 $expectedRemoved = array(), $callback = false
31 ) {
32 $input = self::normalize( $input );
33 $formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
34 if ( $callback ) {
35 $callback( $formatter );
36 }
37 $removedElements = $formatter->filterContent();
38 $html = $formatter->getText();
39 $removed = array();
40 foreach ( $removedElements as $removedElement ) {
41 $removed[] = self::normalize( $formatter->getText( $removedElement ) );
42 }
43 $expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
44
45 $this->assertValidHtmlSnippet( $html );
46 $this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
47 $this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
48 }
49
50 private static function normalize( $s ) {
51 return str_replace( "\n", '',
52 str_replace( "\r", '', $s ) // "yay" to Windows!
53 );
54 }
55
56 public function getHtmlData() {
57 $removeImages = function ( HtmlFormatter $f ) {
58 $f->setRemoveMedia();
59 };
60 $removeTags = function ( HtmlFormatter $f ) {
61 $f->remove( array( 'table', '.foo', '#bar', 'div.baz' ) );
62 };
63 $flattenSomeStuff = function ( HtmlFormatter $f ) {
64 $f->flatten( array( 's', 'div' ) );
65 };
66 $flattenEverything = function ( HtmlFormatter $f ) {
67 $f->flattenAllTags();
68 };
69 return array(
70 // remove images if asked
71 array(
72 '<img src="/foo/bar.jpg" alt="Blah"/>',
73 '',
74 array( '<img src="/foo/bar.jpg" alt="Blah">' ),
75 $removeImages,
76 ),
77 // basic tag removal
78 array(
79 // @codingStandardsIgnoreStart Ignore long line warnings.
80 '<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
81 <strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
82 <span class="baz">baz</span>',
83 // @codingStandardsIgnoreEnd
84 '<div class="notfoo">test</div>
85 <span class="baz">baz</span>',
86 array(
87 '<table><tr><td>foo</td></tr></table>',
88 '<div class="foo">foo</div>',
89 '<div class="foo quux">foo</div>',
90 '<span id="bar">bar</span>',
91 '<strong class="foo" id="bar">foobar</strong>',
92 '<div class="baz"/>',
93 ),
94 $removeTags,
95 ),
96 // don't flatten tags that start like chosen ones
97 array(
98 '<div><s>foo</s> <span>bar</span></div>',
99 'foo <span>bar</span>',
100 array(),
101 $flattenSomeStuff,
102 ),
103 // total flattening
104 array(
105 '<div style="foo">bar<sup>2</sup></div>',
106 'bar2',
107 array(),
108 $flattenEverything,
109 ),
110 // UTF-8 preservation and security
111 array(
112 '<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&#38;&#0038;&#x26;&#x026;',
113 '<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&amp;&amp;&amp;&amp;',
114 array(),
115 $removeTags, // Have some rules to trigger a DOM parse
116 ),
117 // https://phabricator.wikimedia.org/T55086
118 array(
119 'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
120 . ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
121 'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
122 . ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
123 ),
124 );
125 }
126
127 public function testQuickProcessing() {
128 $f = new MockHtmlFormatter( 'foo' );
129 $f->filterContent();
130 $this->assertFalse( $f->hasDoc, 'HtmlFormatter should not needlessly parse HTML' );
131 }
132 }
133
134 class MockHtmlFormatter extends HtmlFormatter {
135 public $hasDoc = false;
136
137 public function getDoc() {
138 $this->hasDoc = true;
139 return parent::getDoc();
140 }
141 }