3 use RemexHtml\Tokenizer\Attributes
;
4 use RemexHtml\Tokenizer\TokenHandler
;
5 use RemexHtml\Tokenizer\Tokenizer
;
10 class RemexStripTagHandler
implements TokenHandler
{
12 public function getResult() {
16 function startDocument( Tokenizer
$t, $fns, $fn ) {
19 function endDocument( $pos ) {
22 function error( $text, $pos ) {
25 function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
26 $this->text
.= substr( $text, $start, $length );
28 function startTag( $name, Attributes
$attrs, $selfClose, $sourceStart, $sourceLength ) {
29 // Inject whitespace for typical block-level tags to
30 // prevent merging unrelated<br>words.
31 if ( $this->isBlockLevelTag( $name ) ) {
35 function endTag( $name, $sourceStart, $sourceLength ) {
36 // Inject whitespace for typical block-level tags to
37 // prevent merging unrelated<br>words.
38 if ( $this->isBlockLevelTag( $name ) ) {
42 function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
45 function comment( $text, $sourceStart, $sourceLength ) {
49 // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
50 // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
51 // The following is a complete list of all HTML block level elements
52 // (although "block-level" is not technically defined for elements that are
54 // Structured as tag => true to allow O(1) membership test.
55 private static $BLOCK_LEVEL_TAGS = [
99 * Detect block level tags. Of course css can make anything a block
100 * level tag, but this is still better than nothing.
102 * @param string $tagName HTML tag name
103 * @return bool True when tag is an html block level element
105 private function isBlockLevelTag( $tagName ) {
106 $key = strtolower( trim( $tagName ) );
107 return isset( self
::$BLOCK_LEVEL_TAGS[$key] );