Merge "Improve docs for Title::getInternalURL/getCanonicalURL"
[lhc/web/wiklou.git] / includes / parser / RemexStripTagHandler.php
1 <?php
2
3 use RemexHtml\Tokenizer\Attributes;
4 use RemexHtml\Tokenizer\TokenHandler;
5 use RemexHtml\Tokenizer\Tokenizer;
6
7 /**
8 * @internal
9 */
10 class RemexStripTagHandler implements TokenHandler {
11 private $text = '';
12 public function getResult() {
13 return $this->text;
14 }
15
16 function startDocument( Tokenizer $t, $fns, $fn ) {
17 // Do nothing.
18 }
19 function endDocument( $pos ) {
20 // Do nothing.
21 }
22 function error( $text, $pos ) {
23 // Do nothing.
24 }
25 function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
26 $this->text .= substr( $text, $start, $length );
27 }
28 function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
29 // Inject whitespace for typical block-level tags to
30 // prevent merging unrelated<br>words.
31 if ( $this->isBlockLevelTag( $name ) ) {
32 $this->text .= ' ';
33 }
34 }
35 function endTag( $name, $sourceStart, $sourceLength ) {
36 // Inject whitespace for typical block-level tags to
37 // prevent merging unrelated<br>words.
38 if ( $this->isBlockLevelTag( $name ) ) {
39 $this->text .= ' ';
40 }
41 }
42 function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
43 // Do nothing.
44 }
45 function comment( $text, $sourceStart, $sourceLength ) {
46 // Do nothing.
47 }
48
49 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
50 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
51 // The following is a complete list of all HTML block level elements
52 // (although "block-level" is not technically defined for elements that are
53 // new in HTML5).
54 // Structured as tag => true to allow O(1) membership test.
55 private static $BLOCK_LEVEL_TAGS = [
56 'address' => true,
57 'article' => true,
58 'aside' => true,
59 'blockquote' => true,
60 'br' => true,
61 'canvas' => true,
62 'dd' => true,
63 'div' => true,
64 'dl' => true,
65 'dt' => true,
66 'fieldset' => true,
67 'figcaption' => true,
68 'figure' => true,
69 'footer' => true,
70 'form' => true,
71 'h1' => true,
72 'h2' => true,
73 'h3' => true,
74 'h4' => true,
75 'h5' => true,
76 'h6' => true,
77 'header' => true,
78 'hgroup' => true,
79 'hr' => true,
80 'li' => true,
81 'main' => true,
82 'nav' => true,
83 'noscript' => true,
84 'ol' => true,
85 'output' => true,
86 'p' => true,
87 'pre' => true,
88 'section' => true,
89 'table' => true,
90 'td' => true,
91 'tfoot' => true,
92 'th' => true,
93 'tr' => true,
94 'ul' => true,
95 'video' => true,
96 ];
97
98 /**
99 * Detect block level tags. Of course css can make anything a block
100 * level tag, but this is still better than nothing.
101 *
102 * @param string $tagName HTML tag name
103 * @return bool True when tag is an html block level element
104 */
105 private function isBlockLevelTag( $tagName ) {
106 $key = strtolower( trim( $tagName ) );
107 return isset( self::$BLOCK_LEVEL_TAGS[$key] );
108 }
109 }