3 use RemexHtml\Tokenizer\Attributes
;
4 use RemexHtml\Tokenizer\TokenHandler
;
5 use RemexHtml\Tokenizer\Tokenizer
;
10 class RemexStripTagHandler
implements TokenHandler
{
13 public function getResult() {
17 function startDocument( Tokenizer
$t, $fns, $fn ) {
21 function endDocument( $pos ) {
25 function error( $text, $pos ) {
29 function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
30 $this->text
.= substr( $text, $start, $length );
33 function startTag( $name, Attributes
$attrs, $selfClose, $sourceStart, $sourceLength ) {
34 // Inject whitespace for typical block-level tags to
35 // prevent merging unrelated<br>words.
36 if ( $this->isBlockLevelTag( $name ) ) {
41 function endTag( $name, $sourceStart, $sourceLength ) {
42 // Inject whitespace for typical block-level tags to
43 // prevent merging unrelated<br>words.
44 if ( $this->isBlockLevelTag( $name ) ) {
49 function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
53 function comment( $text, $sourceStart, $sourceLength ) {
57 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
58 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
59 // The following is a complete list of all HTML block level elements
60 // (although "block-level" is not technically defined for elements that are
62 // Structured as tag => true to allow O(1) membership test.
63 private static $BLOCK_LEVEL_TAGS = [
107 * Detect block level tags. Of course css can make anything a block
108 * level tag, but this is still better than nothing.
110 * @param string $tagName HTML tag name
111 * @return bool True when tag is an html block level element
113 private function isBlockLevelTag( $tagName ) {
114 $key = strtolower( trim( $tagName ) );
115 return isset( self
::$BLOCK_LEVEL_TAGS[$key] );