X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=0034afefae1531bbcd08be4791a94e31e016587a;hb=6b9ae314fd0e205a1fc1d6a52f962f68b6fb3680;hp=5aa0545ba0b95cea21c8af7c8056dd052fd2f3ff;hpb=16b89ab3e74da48c4fd8bd5607775248af5da0aa;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 5aa0545ba0..0034afefae 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -364,14 +364,17 @@ class Sanitizer { * @return string */ static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) { - global $wgUseTidy; + global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes, $wgAllowImageTag; static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; wfProfileIn( __METHOD__ ); - if ( !$staticInitialised ) { + // Base our staticInitialised variable off of the global config state so that if the globals + // are changed (like in the secrewed up test system) we will re-initialise the settings. + $globalContext = implode( '-', compact( 'wgHtml5', 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) ); + if ( !$staticInitialised || $staticInitialised != $globalContext ) { $htmlpairsStatic = array( # Tags that must be closed 'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', @@ -381,13 +384,20 @@ class Sanitizer { 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn', 'kbd', 'samp' ); + if ( $wgHtml5 ) { + $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time', 'mark' ) ); + } $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); $htmlsingleonly = array( # Elements that cannot have close tags 'br', 'hr' ); - $htmlnest = array( # Tags that can be nested directly or indirectly + if ( $wgHtml5 && $wgAllowMicrodataAttributes ) { + $htmlsingle[] = $htmlsingleonly[] = 'meta'; + $htmlsingle[] = $htmlsingleonly[] = 'link'; + } + $htmlnest = array( # Tags that can be nested--?? 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span' ); @@ -401,7 +411,6 @@ class Sanitizer { 'li', ); - global $wgAllowImageTag; if ( $wgAllowImageTag ) { $htmlsingle[] = 'img'; $htmlsingleonly[] = 'img'; @@ -416,7 +425,7 @@ class Sanitizer { foreach ( $vars as $var ) { $$var = array_flip( $$var ); } - $staticInitialised = true; + $staticInitialised = $globalContext; } # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays $extratags = array_flip( $extratags ); @@ -532,6 +541,10 @@ class Sanitizer { call_user_func_array( $processCallback, array( &$params, $args ) ); } + if ( !Sanitizer::validateTag( $params, $t ) ) { + $badtag = true; + } + # Strip non-approved attributes from the tag $newparams = Sanitizer::fixTagAttributes( $params, $t ); } @@ -555,16 +568,24 @@ class Sanitizer { preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs ); @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + $badtag = false; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { if( is_callable( $processCallback ) ) { call_user_func_array( $processCallback, array( &$params, $args ) ); } + + if ( !Sanitizer::validateTag( $params, $t ) ) { + $badtag = true; + } + $newparams = Sanitizer::fixTagAttributes( $params, $t ); - $rest = str_replace( '>', '>', $rest ); - $text .= "<$slash$t$newparams$brace$rest"; - } else { - $text .= '<' . str_replace( '>', '>', $x); + if ( !$badtag ) { + $rest = str_replace( '>', '>', $rest ); + $text .= "<$slash$t$newparams$brace$rest"; + continue; + } } + $text .= '<' . str_replace( '>', '>', $x); } } wfProfileOut( __METHOD__ ); @@ -617,111 +638,35 @@ class Sanitizer { } /** - * Take an array of attribute names and values and fix some deprecated values - * for the given element type. - * This does not validate properties, so you should ensure that you call - * validateTagAttributes AFTER this to ensure that the resulting style rule - * this may add is safe. - * - * - Converts most presentational attributes like align into inline css + * Takes attribute names and values for a tag and the tag name and + * validates that the tag is allowed to be present. + * This DOES NOT validate the attributes, nor does it validate the + * tags themselves. This method only handles the special circumstances + * where we may want to allow a tag within content but ONLY when it has + * specific attributes set. * - * @param $attribs Array - * @param $element String - * @return Array + * @param $params + * @param $element */ - static function fixDeprecatedAttributes( $attribs, $element ) { - global $wgHtml5, $wgCleanupPresentationalAttributes; - - // presentational attributes were removed from html5, we can leave them - // in when html5 is turned off - if ( !$wgHtml5 || !$wgCleanupPresentationalAttributes ) { - return $attribs; - } - - $table = array( 'table' ); - $cells = array( 'td', 'th' ); - $colls = array( 'col', 'colgroup' ); - $tblocks = array( 'tbody', 'tfoot', 'thead' ); - $h = array( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' ); - - $presentationalAttribs = array( - 'align' => array( 'text-align', array_merge( array( 'caption', 'hr', 'div', 'p', 'tr' ), $table, $cells, $colls, $tblocks, $h ) ), - 'clear' => array( 'clear', array( 'br' ) ), - 'height' => array( 'height', $cells ), - 'nowrap' => array( 'white-space', $cells ), - 'size' => array( 'height', array( 'hr' ) ), - 'type' => array( 'list-style-type', array( 'li', 'ol', 'ul' ) ), - 'valign' => array( 'vertical-align', array_merge( $cells, $colls, $tblocks ) ), - 'width' => array( 'width', array_merge( array( 'hr', 'pre' ), $table, $cells, $colls ) ), - ); - - // Ensure that any upper case or mixed case attributes are converted to lowercase - foreach ( $attribs as $attribute => $value ) { - if ( $attribute !== strtolower( $attribute ) && array_key_exists( strtolower( $attribute ), $presentationalAttribs ) ) { - $attribs[strtolower( $attribute )] = $value; - unset( $attribs[$attribute] ); - } - } - - $style = ""; - foreach ( $presentationalAttribs as $attribute => $info ) { - list( $property, $elements ) = $info; + static function validateTag( $params, $element ) { + $params = Sanitizer::decodeTagAttributes( $params ); - // Skip if this attribute is not relevant to this element - if ( !in_array( $element, $elements ) ) { - continue; - } - - // Skip if the attribute is not used - if ( !array_key_exists( $attribute, $attribs ) ) { - continue; + if ( $element == 'meta' || $element == 'link' ) { + if ( !isset( $params['itemprop'] ) ) { + // and must have an itemprop="" otherwise they are not valid or safe in content + return false; } - - $value = $attribs[$attribute]; - - // For nowrap the value should be nowrap instead of whatever text is in the value - if ( $attribute === 'nowrap' ) { - $value = 'nowrap'; + if ( $element == 'meta' && !isset( $params['content'] ) ) { + // must have a content="" for the itemprop + return false; } - - // clear="all" is clear: both; in css - if ( $attribute === 'clear' && strtolower( $value ) === 'all' ) { - $value = 'both'; + if ( $element == 'link' && !isset( $params['href'] ) ) { + // must have an associated href="" + return false; } - - // Size based properties should have px applied to them if they have no unit - if ( in_array( $attribute, array( 'height', 'width', 'size' ) ) ) { - if ( preg_match( '/^[\d.]+$/', $value ) ) { - $value = "{$value}px"; - } - } - - // Table align is special, it's about block alignment instead of - // content align (see also bug 40306) - if ( $attribute === 'align' && in_array( $element, $table ) ) { - if ( $value === 'center' ) { - $style .= ' margin-left: auto;'; - $property = 'margin-right'; - $value = 'auto'; - } else { - $property = 'float'; - } - } - - $style .= " $property: $value;"; - - unset( $attribs[$attribute] ); - } - - if ( $style ) { - // Prepend our style rules so that they can be overridden by user css - if ( isset($attribs['style']) ) { - $style .= " " . $attribs['style']; - } - $attribs['style'] = trim($style); } - return $attribs; + return true; } /** @@ -825,7 +770,7 @@ class Sanitizer { unset( $out['itemid'] ); unset( $out['itemref'] ); } - # TODO: Strip itemprop if we aren't descendants of an itemscope. + # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref. } return $out; } @@ -972,7 +917,6 @@ class Sanitizer { } $decoded = Sanitizer::decodeTagAttributes( $text ); - $decoded = Sanitizer::fixDeprecatedAttributes( $decoded, $element ); $stripped = Sanitizer::validateTagAttributes( $decoded, $element ); $attribs = array(); @@ -1451,10 +1395,7 @@ class Sanitizer { * @return Array */ static function attributeWhitelist( $element ) { - static $list; - if( !isset( $list ) ) { - $list = Sanitizer::setupAttributeWhitelist(); - } + $list = Sanitizer::setupAttributeWhitelist(); return isset( $list[$element] ) ? $list[$element] : array(); @@ -1468,6 +1409,13 @@ class Sanitizer { static function setupAttributeWhitelist() { global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes; + static $whitelist, $staticInitialised; + $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgHtml5', 'wgAllowMicrodataAttributes' ) ); + + if ( isset( $whitelist ) && $staticInitialised == $globalContext ) { + return $whitelist; + } + $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); if ( $wgAllowRdfaAttributes ) { @@ -1500,7 +1448,7 @@ class Sanitizer { # Numbers refer to sections in HTML 4.01 standard describing the element. # See: http://www.w3.org/TR/html4/ - $whitelist = array ( + $whitelist = array( # 7.5.4 'div' => $block, 'center' => $common, # deprecated @@ -1632,7 +1580,28 @@ class Sanitizer { # HTML 5 section 4.6 'bdi' => $common, + ); + + if ( $wgHtml5 ) { + # HTML5 elements, defined by: + # http://www.whatwg.org/specs/web-apps/current-work/multipage/ + $whitelist += array( + 'data' => array_merge( $common, array( 'value' ) ), + 'time' => array_merge( $common, array( 'datetime' ) ), + 'mark' => $common, + + // meta and link are only permitted by removeHTMLtags when Microdata + // is enabled so we don't bother adding a conditional to hide these + // Also meta and link are only valid in WikiText as Microdata elements + // (ie: validateTag rejects tags missing the attributes needed for Microdata) + // So we don't bother including $common attributes that have no purpose. + 'meta' => array( 'itemprop', 'content' ), + 'link' => array( 'itemprop', 'href' ), ); + } + + $staticInitialised = $globalContext; + return $whitelist; }