* This is a heavy regex-based removal of whitespace, unnecessary comments and
* tokens. IE conditional comments are preserved. There are also options to have
* STYLE and SCRIPT blocks compressed by callback functions.
+ * https://github.com/mrclay/minify/blob/master/min/lib/Minify/HTML.php
*
* A test suite is available.
- * http://code.google.com/p/minify/source/browse/trunk/min/lib/Minify/HTML.php
- *
+ *
* @package Minify
* @author Stephen Clay <steve@mrclay.org>
*/
class Minify_HTML {
+ /**
+ * @var boolean
+ */
+ protected $_jsCleanComments = true;
/**
* "Minify" an HTML page
* @return string
*/
public static function minify($html, $options = array()) {
- $min = new Minify_HTML($html, $options);
+ $min = new self($html, $options);
return $min->process();
}
* 'jsMinifier' : (optional) callback function to process content of SCRIPT
* elements. Note: the type attribute is ignored.
*
+ * 'jsCleanComments' : (optional) whether to remove HTML comments beginning and end of script block
+ *
* 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If
* unset, minify will sniff for an XHTML doctype.
*
if (isset($options['jsMinifier'])) {
$this->_jsMinifier = $options['jsMinifier'];
}
+ if (isset($options['jsCleanComments'])) {
+ $this->_jsCleanComments = (bool)$options['jsCleanComments'];
+ }
}
// replace SCRIPTs (and minify) with placeholders
$this->_html = preg_replace_callback(
- '/(\\s*)(<script\\b[^>]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i'
+ '/(\\s*)<script(\\b[^>]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i'
,array($this, '_removeScriptCB')
,$this->_html);
// replace STYLEs (and minify) with placeholders
$this->_html = preg_replace_callback(
- '/\\s*(<style\\b[^>]*?>)([\\s\\S]*?)<\\/style>\\s*/i'
+ '/\\s*<style(\\b[^>]*>)([\\s\\S]*?)<\\/style>\\s*/i'
,array($this, '_removeStyleCB')
,$this->_html);
,$this->_html);
// replace PREs with placeholders
- $this->_html = preg_replace_callback('/\\s*(<pre\\b[^>]*?>[\\s\\S]*?<\\/pre>)\\s*/i'
+ $this->_html = preg_replace_callback('/\\s*<pre(\\b[^>]*?>[\\s\\S]*?<\\/pre>)\\s*/i'
,array($this, '_removePreCB')
,$this->_html);
// replace TEXTAREAs with placeholders
$this->_html = preg_replace_callback(
- '/\\s*(<textarea\\b[^>]*?>[\\s\\S]*?<\\/textarea>)\\s*/i'
+ '/\\s*<textarea(\\b[^>]*?>[\\s\\S]*?<\\/textarea>)\\s*/i'
,array($this, '_removeTextareaCB')
,$this->_html);
+ // replace INPUTs with placeholders
+ $this->_html = preg_replace_callback(
+ '/\\s*<input(\\b[^>]*?>)\\s*/i'
+ ,array($this, '_removeInputCB')
+ ,$this->_html);
+
// trim each line.
// @todo take into account attribute values that span multiple lines.
// 2 regexp because merging un /^\\s+|\\s+$/m also del a lot of newline chars ???
.'|ul)\\b[^>]*>)/i', '$1', $this->_html);
// remove ws outside of all elements
- $this->_html = preg_replace_callback(
- '/>([^<]+)</'
- ,array($this, '_outsideTagCB')
+ $this->_html = preg_replace(
+ '/>(\\s(?:\\s*))?([^<]+)(\\s(?:\s*))?</'
+ ,'>$1$2$3<'
,$this->_html);
// use newlines before 1st attribute in open tags (to limit line lengths)
,array_values($this->_placeholders)
,$this->_html
);
+ // issue 229: multi-pass to catch scripts that didn't get replaced in textareas
+ $this->_html = str_replace(
+ array_keys($this->_placeholders)
+ ,array_values($this->_placeholders)
+ ,$this->_html
+ );
return $this->_html;
}
protected $_cssMinifier = null;
protected $_jsMinifier = null;
- protected function _outsideTagCB($m)
- {
- return '>' . preg_replace('/^\\s+|\\s+$/', ' ', $m[1]) . '<';
- }
-
protected function _removePreCB($m)
{
- return $this->_reservePlace($m[1]);
+ return $this->_reservePlace("<pre{$m[1]}");
}
+ protected function _removeInputCB($m)
+ {
+ return $this->_reservePlace("<input{$m[1]}");
+ }
+
protected function _removeTextareaCB($m)
{
- return $this->_reservePlace($m[1]);
+ return $this->_reservePlace("<textarea{$m[1]}");
}
protected function _removeStyleCB($m)
{
- $openStyle = $m[1];
+ $openStyle = "<style{$m[1]}";
$css = $m[2];
// remove HTML comments
$css = preg_replace('/(?:^\\s*<!--|-->\\s*$)/', '', $css);
protected function _removeScriptCB($m)
{
- $openScript = $m[2];
+ $openScript = "<script{$m[2]}";
$js = $m[3];
// whitespace surrounding? preserve at least one space
$ws2 = ($m[4] === '') ? '' : ' ';
// remove HTML comments (and ending "//" if present)
- $js = preg_replace('/(?:^\\s*<!--\\s*|\\s*(?:\\/\\/)?\\s*-->\\s*$)/', '', $js);
+ if ($this->_jsCleanComments) {
+ $js = preg_replace('/(?:^\\s*<!--\\s*|\\s*(?:\\/\\/)?\\s*-->\\s*$)/', '', $js);
+ }
// remove CDATA section markers
$js = $this->_removeCdata($js);
{
return ($this->_isXhtml && preg_match('/(?:[<&]|\\-\\-|\\]\\]>)/', $str));
}
-}
+}
\ No newline at end of file