Revert Microdata improvements in r111891, r111898, r111899, r111901, r111903, and...
authorDaniel Friesen <dantman@users.mediawiki.org>
Mon, 20 Feb 2012 22:32:18 +0000 (22:32 +0000)
committerDaniel Friesen <dantman@users.mediawiki.org>
Mon, 20 Feb 2012 22:32:18 +0000 (22:32 +0000)
RELEASE-NOTES-1.20
includes/Sanitizer.php
includes/parser/Tidy.php
includes/tidy.conf
tests/parser/parserTests.txt

index f8b1c2c..65317f3 100644 (file)
@@ -22,8 +22,6 @@ production.
 * (bug 34475) Add support for IP/CIDR notation to tablesorter
 * (bug 27619) Remove preference option to display broken links as link?
 * (bug 15404) Add support for sorting fractions in jquery.tablesorter
-* The <data>, <time>, <meta>, and <link> elements are allowed within WikiText for use
-  with Microdata.
 
 === Bug fixes in 1.20 ===
 * (bug 30245) Use the correct way to construct a log page title.
index eb87108..196abd9 100644 (file)
@@ -364,17 +364,14 @@ class Sanitizer {
         * @return string
         */
        static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
-               global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes, $wgAllowImageTag;
+               global $wgUseTidy;
 
                static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
                        $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
 
                wfProfileIn( __METHOD__ );
 
-               // Base our staticInitialised variable off of the global config state so that if the globals
-               // are changed (like in the secrewed up test system) we will re-initialise the settings.
-               $globalContext = implode( '-', compact( 'wgHtml5', 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
-               if ( !$staticInitialised || $staticInitialised != $globalContext ) {
+               if ( !$staticInitialised ) {
 
                        $htmlpairsStatic = array( # Tags that must be closed
                                'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
@@ -384,19 +381,12 @@ class Sanitizer {
                                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
                                'kbd', 'samp'
                        );
-                       if ( $wgHtml5 ) {
-                               $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time' ) );
-                       }
                        $htmlsingle = array(
                                'br', 'hr', 'li', 'dt', 'dd'
                        );
                        $htmlsingleonly = array( # Elements that cannot have close tags
                                'br', 'hr'
                        );
-                       if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
-                               $htmlsingle[] = $htmlsingleonly[] = 'meta';
-                               $htmlsingle[] = $htmlsingleonly[] = 'link';
-                       }
                        $htmlnest = array( # Tags that can be nested--??
                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
                                'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
@@ -411,6 +401,7 @@ class Sanitizer {
                                'li',
                        );
 
+                       global $wgAllowImageTag;
                        if ( $wgAllowImageTag ) {
                                $htmlsingle[] = 'img';
                                $htmlsingleonly[] = 'img';
@@ -425,7 +416,7 @@ class Sanitizer {
                        foreach ( $vars as $var ) {
                                $$var = array_flip( $$var );
                        }
-                       $staticInitialised = $globalContext;
+                       $staticInitialised = true;
                }
                # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
                $extratags = array_flip( $extratags );
@@ -537,10 +528,6 @@ class Sanitizer {
                                                        call_user_func_array( $processCallback, array( &$params, $args ) );
                                                }
 
-                                               if ( !Sanitizer::validateTag( $params, $t ) ) {
-                                                       $badtag = true;
-                                               }
-
                                                # Strip non-approved attributes from the tag
                                                $newparams = Sanitizer::fixTagAttributes( $params, $t );
                                        }
@@ -564,24 +551,16 @@ class Sanitizer {
                                preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
                                $x, $regs );
                                @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
-                               $badtag = false;
                                if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
                                        if( is_callable( $processCallback ) ) {
                                                call_user_func_array( $processCallback, array( &$params, $args ) );
                                        }
-
-                                       if ( !Sanitizer::validateTag( $params, $t ) ) {
-                                               $badtag = true;
-                                       }
-
                                        $newparams = Sanitizer::fixTagAttributes( $params, $t );
-                                       if ( !$badtag ) {
-                                               $rest = str_replace( '>', '&gt;', $rest );
-                                               $text .= "<$slash$t$newparams$brace$rest";
-                                               continue;
-                                       }
+                                       $rest = str_replace( '>', '&gt;', $rest );
+                                       $text .= "<$slash$t$newparams$brace$rest";
+                               } else {
+                                       $text .= '&lt;' . str_replace( '>', '&gt;', $x);
                                }
-                               $text .= '&lt;' . str_replace( '>', '&gt;', $x);
                        }
                }
                wfProfileOut( __METHOD__ );
@@ -729,37 +708,6 @@ class Sanitizer {
                return $attribs;
        }
 
-       /**
-        * Takes attribute names and values for a tag and the tah name and
-        * validates that the tag is allowed to be present.
-        * This DOES NOT validate the attributes, nor does it validate the
-        * tags themselves. This method only handles the special circumstances
-        * where we may want to allow a tag within content but ONLY when it has
-        * specific attributes set.
-        *
-        * @param $
-        */
-       static function validateTag( $params, $element ) {
-               $params = Sanitizer::decodeTagAttributes( $params );
-               
-               if ( $element == 'meta' || $element == 'link' ) {
-                       if ( !isset( $params['itemprop'] ) ) {
-                               // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
-                               return false;
-                       }
-                       if ( $element == 'meta' && !isset( $params['content'] ) ) {
-                               // <meta> must have a content="" for the itemprop
-                               return false;
-                       }
-                       if ( $element == 'link' && !isset( $params['href'] ) ) {
-                               // <link> must have an associated href=""
-                               return false;
-                       }
-               }
-
-               return true;
-       }
-
        /**
         * Take an array of attribute names and values and normalize or discard
         * illegal values for the given element type.
@@ -861,7 +809,7 @@ class Sanitizer {
                                unset( $out['itemid'] );
                                unset( $out['itemref'] );
                        }
-                       # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
+                       # TODO: Strip itemprop if we aren't descendants of an itemscope.
                }
                return $out;
        }
@@ -1486,7 +1434,10 @@ class Sanitizer {
         * @return Array
         */
        static function attributeWhitelist( $element ) {
-               $list = Sanitizer::setupAttributeWhitelist();
+               static $list;
+               if( !isset( $list ) ) {
+                       $list = Sanitizer::setupAttributeWhitelist();
+               }
                return isset( $list[$element] )
                        ? $list[$element]
                        : array();
@@ -1500,13 +1451,6 @@ class Sanitizer {
        static function setupAttributeWhitelist() {
                global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes;
 
-               static $whitelist, $staticInitialised;
-               $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgHtml5', 'wgAllowMicrodataAttributes' ) );
-
-               if ( isset( $whitelist ) && $staticInitialised == $globalContext ) {
-                       return $whitelist;
-               }
-
                $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' );
 
                if ( $wgAllowRdfaAttributes ) {
@@ -1539,7 +1483,7 @@ class Sanitizer {
 
                # Numbers refer to sections in HTML 4.01 standard describing the element.
                # See: http://www.w3.org/TR/html4/
-               $whitelist = array(
+               $whitelist = array (
                        # 7.5.4
                        'div'        => $block,
                        'center'     => $common, # deprecated
@@ -1667,26 +1611,7 @@ class Sanitizer {
                        # 'title' may not be 100% valid here; it's XHTML
                        # http://www.w3.org/TR/REC-MathML/
                        'math'       => array( 'class', 'style', 'id', 'title' ),
-               );
-               
-               if ( $wgHtml5 ) {
-                       # HTML5 elements, defined by:
-                       # http://www.whatwg.org/specs/web-apps/current-work/multipage/
-                       $whitelist += array(
-                               'data' => array_merge( $common, array( 'value' ) ),
-                               'time' => array_merge( $common, array( 'datetime' ) ),
-
-                               // meta and link are only present when Microdata is allowed anyways
-                               // so we don't bother adding another condition here
-                               // meta and link are only valid for use as Microdata so we do not
-                               // allow the common attributes here.
-                               'meta' => array( 'itemprop', 'content' ),
-                               'link' => array( 'itemprop', 'href' ),
                        );
-               }
-
-               $staticInitialised = $globalContext;
-
                return $whitelist;
        }
 
index 2ae9e99..8bd80b2 100644 (file)
@@ -41,15 +41,9 @@ class MWTidyWrapper {
                        dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
                $this->mMarkerIndex = 0;
 
-               // Replace <mw:editsection> elements with placeholders
                $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
                        array( &$this, 'replaceEditSectionLinksCallback' ), $text );
 
-               // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
-               // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
-               $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
-
-               // Wrap the whole thing in a doctype and body for Tidy.
                $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
                        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
                        '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>';
@@ -74,13 +68,7 @@ class MWTidyWrapper {
         * @return string
         */
        public function postprocess( $text ) {
-               // Revert <html-{link,meta}> back to <{link,meta}>
-               $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
-
-               // Restore the contents of placeholder tokens
-               $text = $this->mTokens->replace( $text );
-
-               return $text;
+               return $this->mTokens->replace( $text );
        }
 
 }
index 6ae8d45..09412f0 100644 (file)
@@ -16,8 +16,3 @@ quiet: yes
 quote-nbsp: yes
 fix-backslash: no
 fix-uri: no
-
-# Don't strip html5 elements we support
-# html-{meta,link} is a hack we use to prevent Tidy from stripping <meta> and <link> used in the body for Microdata
-new-empty-tags: html-meta, html-link
-new-inline-tags: data, time
index 8535d78..6f5702d 100644 (file)
@@ -5419,30 +5419,6 @@ disabled
 Something need to be done. foo-2 ? 
 !! end
 
-!! test
-Sanitizer: Validating that <meta> and <link> work, but only for Microdata
-!! input
-<div itemscope>
-       <meta itemprop="hello" content="world">
-       <meta http-equiv="refresh" content="5">
-       <meta itemprop="hello" http-equiv="refresh" content="5">
-       <link itemprop="hello" href="{{SERVER}}">
-       <link rel="stylesheet" href="{{SERVER}}">
-       <link rel="stylesheet" itemprop="hello" href="{{SERVER}}">
-</div>
-!! result
-<div itemscope="itemscope">
-<p>    <meta itemprop="hello" content="world" />
-       &lt;meta http-equiv="refresh" content="5"&gt;
-       <meta itemprop="hello" content="5" />
-</p>
-       <link itemprop="hello" href="http&#58;//Britney-Spears" />
-       &lt;link rel="stylesheet" href="<a rel="nofollow" class="external free" href="http://Britney-Spears">http://Britney-Spears</a>"&gt;
-       <link itemprop="hello" href="http&#58;//Britney-Spears" />
-</div>
-
-!! end
-
 !! test
 Language converter: output gets cut off unexpectedly (bug 5757)
 !! options