Strip some microdata attributes when invalid
authorAryeh Gregor <simetrical@users.mediawiki.org>
Mon, 18 Jan 2010 01:30:41 +0000 (01:30 +0000)
committerAryeh Gregor <simetrical@users.mediawiki.org>
Mon, 18 Jan 2010 01:30:41 +0000 (01:30 +0000)
includes/Sanitizer.php
maintenance/parserTests.txt

index 0487762..adfbd5a 100644 (file)
@@ -620,7 +620,7 @@ class Sanitizer {
         * @todo Check for unique id attribute :P
         */
        static function validateAttributes( $attribs, $whitelist ) {
-               global $wgAllowRdfaAttributes;
+               global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
 
                $whitelist = array_flip( $whitelist );
                $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
@@ -682,6 +682,29 @@ class Sanitizer {
                        // Output should only have one attribute of each name.
                        $out[$attribute] = $value;
                }
+
+               if ( $wgAllowMicrodataAttributes ) {
+                       # There are some complicated validity constraints we need to
+                       # enforce here.  First of all, we don't want to allow non-standard
+                       # itemtypes.
+                       $allowedTypes = array(
+                               'http://microformats.org/profile/hcard',
+                               'http://microformats.org/profile/hcalendar#vevent',
+                               'http://n.whatwg.org/work',
+                       );
+                       if ( isset( $out['itemtype'] ) && !in_array( $out['itemtype'],
+                       $allowedTypes ) ) {
+                               # Kill everything
+                               unset( $out['itemscope'] );
+                       }
+                       # itemtype, itemid, itemref don't make sense without itemscope
+                       if ( !array_key_exists( 'itemscope', $out ) ) {
+                               unset( $out['itemtype'] );
+                               unset( $out['itemid'] );
+                               unset( $out['itemref'] );
+                       }
+                       # TODO: Strip itemprop if we aren't descendants of an itemscope.
+               }
                return $out;
        }
 
index 1283a4b..4b45f58 100644 (file)
@@ -7764,6 +7764,32 @@ license</a>.</small></p>
 
 !! end
 
+!! test
+Microdata: license example from spec with bad itemtype
+!! input
+<div itemscope itemtype="http://nonstandard.invalid/">
+<img itemprop="work" src="mypond.jpeg">
+<p><cite itemprop="title">My Pond</cite></p>
+<p><small>Licensed under the <a itemprop="license"
+href="http://creativecommons.org/licenses/by-sa/3.0/us/">Creative
+Commons Attribution-Share Alike 3.0 United States License</a>
+and the <a itemprop="license"
+href="http://www.opensource.org/licenses/mit-license.php">MIT
+license</a>.</small></p>
+</div>
+!! result
+<div>
+<p>&lt;img itemprop="work" src="mypond.jpeg"&gt;
+</p>
+<p><cite itemprop="title">My Pond</cite></p>
+<p><small>Licensed under the <a href="http://creativecommons.org/licenses/by-sa/3.0/us/" class="external " itemprop="license">Creative
+Commons Attribution-Share Alike 3.0 United States License</a>
+and the <a href="http://www.opensource.org/licenses/mit-license.php" class="external " itemprop="license">MIT
+license</a>.</small></p>
+</div>
+
+!! end
+