Handle one part of bug 32545 while improving MediaWiki's support for Microdata in...
authorDaniel Friesen <dantman@users.mediawiki.org>
Sun, 19 Feb 2012 21:43:37 +0000 (21:43 +0000)
committerDaniel Friesen <dantman@users.mediawiki.org>
Sun, 19 Feb 2012 21:43:37 +0000 (21:43 +0000)
RELEASE-NOTES-1.20
includes/Sanitizer.php
tests/parser/parserTests.txt

index 65317f3..f8b1c2c 100644 (file)
@@ -22,6 +22,8 @@ production.
 * (bug 34475) Add support for IP/CIDR notation to tablesorter
 * (bug 27619) Remove preference option to display broken links as link?
 * (bug 15404) Add support for sorting fractions in jquery.tablesorter
+* The <data>, <time>, <meta>, and <link> elements are allowed within WikiText for use
+  with Microdata.
 
 === Bug fixes in 1.20 ===
 * (bug 30245) Use the correct way to construct a log page title.
index 196abd9..cc71902 100644 (file)
@@ -364,7 +364,7 @@ class Sanitizer {
         * @return string
         */
        static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
-               global $wgUseTidy;
+               global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes;
 
                static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
                        $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
@@ -381,12 +381,19 @@ class Sanitizer {
                                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
                                'kbd', 'samp'
                        );
+                       if ( $wgHtml5 ) {
+                               $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time' ) );
+                       }
                        $htmlsingle = array(
                                'br', 'hr', 'li', 'dt', 'dd'
                        );
                        $htmlsingleonly = array( # Elements that cannot have close tags
                                'br', 'hr'
                        );
+                       if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
+                               $htmlsingle[] = $htmlsingleonly[] = 'meta';
+                               $htmlsingle[] = $htmlsingleonly[] = 'link';
+                       }
                        $htmlnest = array( # Tags that can be nested--??
                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
                                'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
@@ -528,6 +535,10 @@ class Sanitizer {
                                                        call_user_func_array( $processCallback, array( &$params, $args ) );
                                                }
 
+                                               if ( !Sanitizer::validateTag( $params, $t ) ) {
+                                                       $badtag = true;
+                                               }
+
                                                # Strip non-approved attributes from the tag
                                                $newparams = Sanitizer::fixTagAttributes( $params, $t );
                                        }
@@ -708,6 +719,37 @@ class Sanitizer {
                return $attribs;
        }
 
+       /**
+        * Takes attribute names and values for a tag and the tah name and
+        * validates that the tag is allowed to be present.
+        * This DOES NOT validate the attributes, nor does it validate the
+        * tags themselves. This method only handles the special circumstances
+        * where we may want to allow a tag within content but ONLY when it has
+        * specific attributes set.
+        *
+        * @param $
+        */
+       static function validateTag( $params, $element ) {
+               $params = Sanitizer::decodeTagAttributes( $params );
+               
+               if ( $element == 'meta' || $element == 'link' ) {
+                       if ( !isset( $params['itemprop'] ) ) {
+                               // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
+                               return false;
+                       }
+                       if ( $element == 'meta' && !isset( $params['content'] ) ) {
+                               // <meta> must have a content="" for the itemprop
+                               return false;
+                       }
+                       if ( $element == 'link' && !isset( $params['href'] ) ) {
+                               // <link> must have an associated href=""
+                               return false;
+                       }
+               }
+
+               return true;
+       }
+
        /**
         * Take an array of attribute names and values and normalize or discard
         * illegal values for the given element type.
@@ -809,7 +851,7 @@ class Sanitizer {
                                unset( $out['itemid'] );
                                unset( $out['itemref'] );
                        }
-                       # TODO: Strip itemprop if we aren't descendants of an itemscope.
+                       # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
                }
                return $out;
        }
@@ -1483,7 +1525,7 @@ class Sanitizer {
 
                # Numbers refer to sections in HTML 4.01 standard describing the element.
                # See: http://www.w3.org/TR/html4/
-               $whitelist = array (
+               $whitelist = array(
                        # 7.5.4
                        'div'        => $block,
                        'center'     => $common, # deprecated
@@ -1611,7 +1653,24 @@ class Sanitizer {
                        # 'title' may not be 100% valid here; it's XHTML
                        # http://www.w3.org/TR/REC-MathML/
                        'math'       => array( 'class', 'style', 'id', 'title' ),
+               );
+               
+               if ( $wgHtml5 ) {
+                       # HTML5 elements, defined by:
+                       # http://www.whatwg.org/specs/web-apps/current-work/multipage/
+                       $whitelist += array(
+                               'data' => array_merge( $common, array( 'value' ) ),
+                               'time' => array_merge( $common, array( 'datetime' ) ),
+
+                               // meta and link are only present when Microdata is allowed anyways
+                               // so we don't bother adding another condition here
+                               // meta and link are only valid for use as Microdata so we do not
+                               // allow the common attributes here.
+                               'meta' => array( 'itemprop', 'content' ),
+                               'link' => array( 'itemprop', 'href' ),
                        );
+               }
+
                return $whitelist;
        }
 
index 6f5702d..22970a1 100644 (file)
@@ -5419,6 +5419,26 @@ disabled
 Something need to be done. foo-2 ? 
 !! end
 
+!! test
+Sanitizer: Validating that <meta> and <link> work, but only for Microdata
+!! input
+<div itemscope>
+       <meta itemprop="hello" content="world">
+       <meta http-equiv="refresh" content="5">
+       <link itemprop="hello" href="{{SERVER}}">
+       <link rel="stylesheet" href="{{SERVER}}">
+</div>
+!! result
+<div itemscope="itemscope">
+<p>    <meta itemprop="hello" content="world" />
+       &lt;meta http-equiv="refresh" content="5"&gt;
+</p>
+       <link itemprop="hello" href="http&#58;//Britney-Spears" />
+       &lt;link rel="stylesheet" href="<a rel="nofollow" class="external free" href="http://Britney-Spears">http://Britney-Spears</a>"&gt;
+</div>
+
+!! end
+
 !! test
 Language converter: output gets cut off unexpectedly (bug 5757)
 !! options