Merge "Selenium: replace UserLoginPage with BlankPage where possible"
[lhc/web/wiklou.git] / includes / parser / RemexStripTagHandler.php
1 <?php
2
3 use RemexHtml\Tokenizer\Attributes;
4 use RemexHtml\Tokenizer\TokenHandler;
5 use RemexHtml\Tokenizer\Tokenizer;
6
7 /**
8 * @internal
9 */
10 class RemexStripTagHandler implements TokenHandler {
11 private $text = '';
12
13 public function getResult() {
14 return $this->text;
15 }
16
17 function startDocument( Tokenizer $t, $fns, $fn ) {
18 // Do nothing.
19 }
20
21 function endDocument( $pos ) {
22 // Do nothing.
23 }
24
25 function error( $text, $pos ) {
26 // Do nothing.
27 }
28
29 function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
30 $this->text .= substr( $text, $start, $length );
31 }
32
33 function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
34 // Inject whitespace for typical block-level tags to
35 // prevent merging unrelated<br>words.
36 if ( $this->isBlockLevelTag( $name ) ) {
37 $this->text .= ' ';
38 }
39 }
40
41 function endTag( $name, $sourceStart, $sourceLength ) {
42 // Inject whitespace for typical block-level tags to
43 // prevent merging unrelated<br>words.
44 if ( $this->isBlockLevelTag( $name ) ) {
45 $this->text .= ' ';
46 }
47 }
48
49 function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
50 // Do nothing.
51 }
52
53 function comment( $text, $sourceStart, $sourceLength ) {
54 // Do nothing.
55 }
56
57 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
58 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
59 // The following is a complete list of all HTML block level elements
60 // (although "block-level" is not technically defined for elements that are
61 // new in HTML5).
62 // Structured as tag => true to allow O(1) membership test.
63 private static $BLOCK_LEVEL_TAGS = [
64 'address' => true,
65 'article' => true,
66 'aside' => true,
67 'blockquote' => true,
68 'br' => true,
69 'canvas' => true,
70 'dd' => true,
71 'div' => true,
72 'dl' => true,
73 'dt' => true,
74 'fieldset' => true,
75 'figcaption' => true,
76 'figure' => true,
77 'footer' => true,
78 'form' => true,
79 'h1' => true,
80 'h2' => true,
81 'h3' => true,
82 'h4' => true,
83 'h5' => true,
84 'h6' => true,
85 'header' => true,
86 'hgroup' => true,
87 'hr' => true,
88 'li' => true,
89 'main' => true,
90 'nav' => true,
91 'noscript' => true,
92 'ol' => true,
93 'output' => true,
94 'p' => true,
95 'pre' => true,
96 'section' => true,
97 'table' => true,
98 'td' => true,
99 'tfoot' => true,
100 'th' => true,
101 'tr' => true,
102 'ul' => true,
103 'video' => true,
104 ];
105
106 /**
107 * Detect block level tags. Of course css can make anything a block
108 * level tag, but this is still better than nothing.
109 *
110 * @param string $tagName HTML tag name
111 * @return bool True when tag is an html block level element
112 */
113 private function isBlockLevelTag( $tagName ) {
114 $key = strtolower( trim( $tagName ) );
115 return isset( self::$BLOCK_LEVEL_TAGS[$key] );
116 }
117 }