API: (bug 19004) Add support for tags. Patch by Matthew Britton
[lhc/web/wiklou.git] / includes / Html.php
index ca76c67..9573269 100644 (file)
@@ -99,15 +99,63 @@ class Html {
         *
         * @param $element  string The element's name, e.g., 'a'
         * @param $attribs  array  Associative array of attributes, e.g., array(
-        *   'href' => 'http://www.mediawiki.org/' ).  Values will be HTML-escaped.
+        *   'href' => 'http://www.mediawiki.org/' ).  See expandAttributes() for
+        *   further documentation.
         * @param $contents string The raw HTML contents of the element: *not*
         *   escaped!
         * @return string Raw HTML
         */
        public static function rawElement( $element, $attribs = array(), $contents = '' ) {
-               global $wgWellFormedXml;
+               global $wgHtml5, $wgWellFormedXml;
+               $attribs = (array)$attribs;
+               # This is not required in HTML 5, but let's do it anyway, for
+               # consistency and better compression.
                $element = strtolower( $element );
-               $start = "<$element" . self::expandAttributes( $attribs );
+
+               # Element-specific hacks to slim down output and ensure validity
+               if ( $element == 'input' ) {
+                       if ( !$wgHtml5 ) {
+                               # With $wgHtml5 off we want to validate as XHTML 1, so we
+                               # strip out any fancy HTML 5-only input types for now.
+                               #
+                               # Whitelist of valid types:
+                               $validTypes = array(
+                                       'hidden',
+                                       'text',
+                                       'password',
+                                       'checkbox',
+                                       'radio',
+                                       'file',
+                                       'submit',
+                                       'image',
+                                       'reset',
+                                       'button',
+                               );
+                               if ( isset( $attribs['type'] )
+                               && !in_array( $attribs['type'], $validTypes ) ) {
+                                       # Fall back to type=text, the default
+                                       unset( $attribs['type'] );
+                               }
+                               # Here we're blacklisting some HTML5-only attributes...
+                               $html5attribs = array(
+                                       'autocomplete',
+                                       'autofocus',
+                                       'max',
+                                       'min',
+                                       'multiple',
+                                       'pattern',
+                                       'placeholder',
+                                       'required',
+                                       'step',
+                               );
+                               foreach ( $html5attribs as $badAttr ) {
+                                       unset( $attribs[$badAttr] );
+                               }
+                       }
+               }
+
+               $start = "<$element" . self::expandAttributes(
+                       self::dropDefaults( $element, $attribs ) );
                if ( in_array( $element, self::$voidElements ) ) {
                        if ( $wgWellFormedXml ) {
                                return "$start />";
@@ -131,6 +179,107 @@ class Html {
                ) ) );
        }
 
+       /**
+        * Given an element name and an associative array of element attributes,
+        * return an array that is functionally identical to the input array, but
+        * possibly smaller.  In particular, attributes might be stripped if they
+        * are given their default values.
+        *
+        * This method is not guaranteed to remove all redundant attributes, only
+        * some common ones and some others selected arbitrarily at random.  It
+        * only guarantees that the output array should be functionally identical
+        * to the input array (currently per the HTML 5 draft as of 2009-09-06).
+        *
+        * @param $element string Name of the element, e.g., 'a'
+        * @param $attribs array  Associative array of attributes, e.g., array(
+        *   'href' => 'http://www.mediawiki.org/' ).  See expandAttributes() for
+        *   further documentation.
+        * @return array An array of attributes functionally identical to $attribs
+        */
+       private static function dropDefaults( $element, $attribs ) {
+               # Don't bother doing anything if we aren't outputting HTML5; it's too
+               # much of a pain to maintain two sets of defaults.
+               global $wgHtml5;
+               if ( !$wgHtml5 ) {
+                       return $attribs;
+               }
+
+               static $attribDefaults = array(
+                       'area' => array( 'shape' => 'rect' ),
+                       'button' => array(
+                               'formaction' => 'GET',
+                               'formenctype' => 'application/x-www-form-urlencoded',
+                               'type' => 'submit',
+                       ),
+                       'canvas' => array(
+                               'height' => '150',
+                               'width' => '300',
+                       ),
+                       'command' => array( 'type' => 'command' ),
+                       'form' => array(
+                               'action' => 'GET',
+                               'autocomplete' => 'on',
+                               'enctype' => 'application/x-www-form-urlencoded',
+                       ),
+                       'input' => array(
+                               'formaction' => 'GET',
+                               'type' => 'text',
+                               'value' => '',
+                       ),
+                       'keygen' => array( 'keytype' => 'rsa' ),
+                       'link' => array( 'media' => 'all' ),
+                       'menu' => array( 'type' => 'list' ),
+                       # Note: the use of text/javascript here instead of other JavaScript
+                       # MIME types follows the HTML 5 spec.
+                       'script' => array( 'type' => 'text/javascript' ),
+                       'style' => array(
+                               'media' => 'all',
+                               'type' => 'text/css',
+                       ),
+                       'textarea' => array( 'wrap' => 'soft' ),
+               );
+
+               $element = strtolower( $element );
+
+               foreach ( $attribs as $attrib => $value ) {
+                       $lcattrib = strtolower( $attrib );
+                       $value = strval( $value );
+
+                       # Simple checks using $attribDefaults
+                       if ( isset( $attribDefaults[$element][$lcattrib] ) &&
+                       $attribDefaults[$element][$lcattrib] == $value ) {
+                               unset( $attribs[$attrib] );
+                       }
+
+                       if ( $lcattrib == 'class' && $value == '' ) {
+                               unset( $attribs[$attrib] );
+                       }
+               }
+
+               # More subtle checks
+               if ( $element === 'link' && isset( $attribs['type'] )
+               && strval( $attribs['type'] ) == 'text/css' ) {
+                       unset( $attribs['type'] );
+               }
+               if ( $element === 'select' && isset( $attribs['size'] ) ) {
+                       if ( in_array( 'multiple', $attribs )
+                               || ( isset( $attribs['multiple'] ) && $attribs['multiple'] !== false )
+                       ) {
+                               # A multi-select
+                               if ( strval( $attribs['size'] ) == '4' ) {
+                                       unset( $attribs['size'] );
+                               }
+                       } else {
+                               # Single select
+                               if ( strval( $attribs['size'] ) == '1' ) {
+                                       unset( $attribs['size'] );
+                               }
+                       }
+               }
+
+               return $attribs;
+       }
+
        /**
         * Given an associative array of element attributes, generate a string
         * to stick after the element name in HTML output.  Like array( 'href' =>
@@ -142,6 +291,9 @@ class Html {
         *
         * @param $attribs array Associative array of attributes, e.g., array(
         *   'href' => 'http://www.mediawiki.org/' ).  Values will be HTML-escaped.
+        *   A value of false means to omit the attribute.  For boolean attributes,
+        *   you can omit the key, e.g., array( 'checked' ) instead of
+        *   array( 'checked' => 'checked' ) or such.
         * @return string HTML fragment that goes between element name and '>'
         *   (starting with a space if at least one attribute is output)
         */
@@ -149,14 +301,36 @@ class Html {
                global $wgHtml5, $wgWellFormedXml;
 
                $ret = '';
+               $attribs = (array)$attribs;
                foreach ( $attribs as $key => $value ) {
+                       if ( $value === false ) {
+                               continue;
+                       }
+
+                       # For boolean attributes, support array( 'foo' ) instead of
+                       # requiring array( 'foo' => 'meaningless' ).
+                       if ( is_int( $key )
+                       && in_array( strtolower( $value ), self::$boolAttribs ) ) {
+                               $key = $value;
+                       }
+
+                       # Not technically required in HTML 5, but required in XHTML 1.0,
+                       # and we'd like consistency and better compression anyway.
+                       $key = strtolower( $key );
+
                        # See the "Attributes" section in the HTML syntax part of HTML 5,
                        # 9.1.2.3 as of 2009-08-10.  Most attributes can have quotation
                        # marks omitted, but not all.  (Although a literal " is not
                        # permitted, we don't check for that, since it will be escaped
                        # anyway.)
-                       if ( $wgWellFormedXml || $value == ''
-                       || preg_match( "/[ '=<>]/", $value ) ) {
+                       #
+                       # See also research done on further characters that need to be
+                       # escaped: http://code.google.com/p/html5lib/issues/detail?id=93
+                       $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}"
+                               . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}"
+                               . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";
+                       if ( $wgWellFormedXml || $value === ''
+                       || preg_match( "![$badChars]!u", $value ) ) {
                                $quote = '"';
                        } else {
                                $quote = '';
@@ -179,13 +353,23 @@ class Html {
                                # and we don't need <> escaped here, we may as well not call
                                # htmlspecialchars().  FIXME: verify that we actually need to
                                # escape \n\r\t here, and explain why, exactly.
-                               $ret .= " $key=$quote" . strtr( $value, array(
+                               #
+                               # We could call Sanitizer::encodeAttribute() for this, but we
+                               # don't because we're stubborn and like our marginal savings on
+                               # byte size from not having to encode unnecessary quotes.
+                               $map = array(
                                        '&' => '&amp;',
                                        '"' => '&quot;',
                                        "\n" => '&#10;',
                                        "\r" => '&#13;',
                                        "\t" => '&#9;'
-                               ) ) . $quote;
+                               );
+                               if ( $wgWellFormedXml ) {
+                                       # '<' must be escaped in attributes for XML for some
+                                       # reason, per spec: http://www.w3.org/TR/xml/#NT-AttValue
+                                       $map['<'] = '&lt;';
+                               }
+                               $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
                        }
                }
                return $ret;
@@ -200,11 +384,13 @@ class Html {
         * @return string Raw HTML
         */
        public static function inlineScript( $contents ) {
-               global $wgHtml5, $wgJsMimeType;
+               global $wgHtml5, $wgJsMimeType, $wgWellFormedXml;
 
                $attrs = array();
                if ( !$wgHtml5 ) {
                        $attrs['type'] = $wgJsMimeType;
+               }
+               if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
                        $contents = "/*<![CDATA[*/$contents/*]]>*/";
                }
                return self::rawElement( 'script', $attrs, $contents );
@@ -233,24 +419,19 @@ class Html {
         * contains literal '</style>' (admittedly unlikely).
         *
         * @param $contents string CSS
-        * @param $media mixed A media type string, like 'screen', or null for all
-        *   media
+        * @param $media mixed A media type string, like 'screen'
         * @return string Raw HTML
         */
-       public static function inlineStyle( $contents, $media = null ) {
-               global $wgHtml5;
+       public static function inlineStyle( $contents, $media = 'all' ) {
+               global $wgWellFormedXml;
 
-               $attrs = array();
-               if ( !$wgHtml5 ) {
-                       # Technically we should probably add CDATA stuff here like with
-                       # scripts, but in practice, stylesheets tend not to have
-                       # problematic characters anyway.
-                       $attrs['type'] = 'text/css';
-               }
-               if ( $media !== null ) {
-                       $attrs['media'] = $media;
+               if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
+                       $contents = "/*<![CDATA[*/$contents/*]]>*/";
                }
-               return self::rawElement( 'style', $attrs, $contents );
+               return self::rawElement( 'style', array(
+                       'type' => 'text/css',
+                       'media' => $media,
+               ), $contents );
        }
 
        /**
@@ -258,21 +439,16 @@ class Html {
         * media type (if any).
         *
         * @param $url string
-        * @param $media mixed A media type string, like 'screen', or null for all
-        *   media
+        * @param $media mixed A media type string, like 'screen'
         * @return string Raw HTML
         */
-       public static function linkedStyle( $url, $media = null ) {
-               global $wgHtml5;
-
-               $attrs = array( 'rel' => 'stylesheet', 'href' => $url );
-               if ( !$wgHtml5 ) {
-                       $attrs['type'] = 'text/css';
-               }
-               if ( $media !== null ) {
-                       $attrs['media'] = $media;
-               }
-               return self::element( 'link', $attrs );
+       public static function linkedStyle( $url, $media = 'all' ) {
+               return self::element( 'link', array(
+                       'rel' => 'stylesheet',
+                       'href' => $url,
+                       'type' => 'text/css',
+                       'media' => $media,
+               ) );
        }
 
        /**
@@ -281,59 +457,31 @@ class Html {
         * $wgHtml5 is false.
         *
         * @param $name    string name attribute
-        * @param $value   mixed  value attribute (null = omit)
+        * @param $value   mixed  value attribute
         * @param $type    string type attribute
-        * @param $attribs array  Assocative array of miscellaneous extra attributes,
-        *   passed to Html::element()
+        * @param $attribs array  Associative array of miscellaneous extra
+        *   attributes, passed to Html::element()
         * @return string Raw HTML
         */
-       public static function input( $name, $value = null, $type = 'text', $attribs = array() ) {
-               global $wgHtml5;
-
-               if ( !$wgHtml5 ) {
-                       // With $wgHtml5 off we want to validate as XHTML 1, so we
-                       // strip out any fancy HTML 5-only input types for now.
-                       //
-                       // Whitelist of valid types:
-                       $validTypes = array(
-                               'hidden',
-                               'text',
-                               'password',
-                               'checkbox',
-                               'radio',
-                               'file',
-                               'submit',
-                               'image',
-                               'reset',
-                               'button',
-                       );
-                       if ( !in_array( $type, $validTypes ) ) {
-                               $type = 'text';
-                       }
-                       // Here we're blacklisting some HTML5-only attributes...
-                       $html5attribs = array(
-                               'autocomplete',
-                               'autofocus',
-                               'max',
-                               'min',
-                               'multiple',
-                               'pattern',
-                               'placeholder',
-                               'required',
-                               'step',
-                       );
-                       foreach ( $html5attribs as $badAttr ) {
-                               unset( $attribs[$badAttr] );
-                       }
-               }
-               if ( $type != 'text' ) {
-                       $attribs['type'] = $type;
-               }
-               if ( $value !== null ) {
-                       $attribs['value'] = $value;
-               }
+       public static function input( $name, $value = '', $type = 'text', $attribs = array() ) {
+               $attribs['type'] = $type;
+               $attribs['value'] = $value;
                $attribs['name'] = $name;
 
                return self::element( 'input', $attribs );
        }
+
+       /**
+        * Convenience function to produce an input element with type=hidden, like
+        * Xml::hidden.
+        *
+        * @param $name    string name attribute
+        * @param $value   string value attribute
+        * @param $attribs array  Associative array of miscellaneous extra
+        *   attributes, passed to Html::element()
+        * @return string Raw HTML
+        */
+       public static function hidden( $name, $value, $attribs = array() ) {
+               return self::input( $name, $value, 'hidden', $attribs );
+       }
 }