$this->text .= substr( $text, $start, $length );
}
function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
- // Do nothing.
+ // Inject whitespace for typical block-level tags to
+ // prevent merging unrelated<br>words.
+ if ( $this->isBlockLevelTag( $name ) ) {
+ $this->text .= ' ';
+ }
}
function endTag( $name, $sourceStart, $sourceLength ) {
- // Do nothing.
+ // Inject whitespace for typical block-level tags to
+ // prevent merging unrelated<br>words.
+ if ( $this->isBlockLevelTag( $name ) ) {
+ $this->text .= ' ';
+ }
}
function doctype( $name, $public, $system, $quirks, $sourceStart, $sourceLength ) {
// Do nothing.
function comment( $text, $sourceStart, $sourceLength ) {
// Do nothing.
}
+
+ // Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
+ // retrieved on sept 12, 2018. <br> is not block level but was added anyways.
+ // The following is a complete list of all HTML block level elements
+ // (although "block-level" is not technically defined for elements that are
+ // new in HTML5).
+ // Structured as tag => true to allow O(1) membership test.
+ static private $BLOCK_LEVEL_TAGS = [
+ 'address' => true,
+ 'article' => true,
+ 'aside' => true,
+ 'blockquote' => true,
+ 'br' => true,
+ 'canvas' => true,
+ 'dd' => true,
+ 'div' => true,
+ 'dl' => true,
+ 'dt' => true,
+ 'fieldset' => true,
+ 'figcaption' => true,
+ 'figure' => true,
+ 'footer' => true,
+ 'form' => true,
+ 'h1' => true,
+ 'h2' => true,
+ 'h3' => true,
+ 'h4' => true,
+ 'h5' => true,
+ 'h6' => true,
+ 'header' => true,
+ 'hgroup' => true,
+ 'hr' => true,
+ 'li' => true,
+ 'main' => true,
+ 'nav' => true,
+ 'noscript' => true,
+ 'ol' => true,
+ 'output' => true,
+ 'p' => true,
+ 'pre' => true,
+ 'section' => true,
+ 'table' => true,
+ 'tfoot' => true,
+ 'ul' => true,
+ 'video' => true,
+ ];
+
+ /**
+ * Detect block level tags. Of course css can make anything a block
+ * level tag, but this is still better than nothing.
+ *
+ * @param string $tagName HTML tag name
+ * @return bool True when tag is an html block level element
+ */
+ private function isBlockLevelTag( $tagName ) {
+ $key = strtolower( trim( $tagName ) );
+ return isset( self::$BLOCK_LEVEL_TAGS[$key] );
+ }
}