X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=8f1fc99fcde07fec42629d7b934960a3154891fd;hb=30d8c9a42d23e01d82f791a7837e3f4bc3f53dbd;hp=d52bc07324a5075a46f38fff1b144fef6c285a3a;hpb=179e55baffa103017d68fdc6037d92153218e46d;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index d52bc07324..8f1fc99fcd 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -363,14 +363,14 @@ class Sanitizer { * @return array */ public static function getRecognizedTagData( $extratags = [], $removetags = [] ) { - global $wgAllowMicrodataAttributes, $wgAllowImageTag; + global $wgAllowImageTag; static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; // Base our staticInitialised variable off of the global config state so that if the globals // are changed (like in the screwed up test system) we will re-initialise the settings. - $globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) ); + $globalContext = $wgAllowImageTag; if ( !$staticInitialised || $staticInitialised != $globalContext ) { $htmlpairsStatic = [ # Tags that must be closed 'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', @@ -381,15 +381,18 @@ class Sanitizer { 'kbd', 'samp', 'data', 'time', 'mark' ]; $htmlsingle = [ - 'br', 'wbr', 'hr', 'li', 'dt', 'dd' + 'br', 'wbr', 'hr', 'li', 'dt', 'dd', 'meta', 'link' ]; - $htmlsingleonly = [ # Elements that cannot have close tags - 'br', 'wbr', 'hr' + + # Elements that cannot have close tags. This is (not coincidentally) + # also the list of tags for which the HTML 5 parsing algorithm + # requires you to "acknowledge the token's self-closing flag", i.e. + # a self-closing tag like
is not an HTML 5 parse error only + # for this list. + $htmlsingleonly = [ + 'br', 'wbr', 'hr', 'meta', 'link' ]; - if ( $wgAllowMicrodataAttributes ) { - $htmlsingle[] = $htmlsingleonly[] = 'meta'; - $htmlsingle[] = $htmlsingleonly[] = 'link'; - } + $htmlnest = [ # Tags that can be nested--?? 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span', @@ -450,10 +453,14 @@ class Sanitizer { * @param array|bool $args Arguments for the processing callback * @param array $extratags For any extra tags to include * @param array $removetags For any tags (default or extra) to exclude + * @param callable $warnCallback (Deprecated) Callback allowing the + * addition of a tracking category when bad input is encountered. + * DO NOT ADD NEW PARAMETERS AFTER $warnCallback, since it will be + * removed shortly. * @return string */ public static function removeHTMLtags( $text, $processCallback = null, - $args = [], $extratags = [], $removetags = [] + $args = [], $extratags = [], $removetags = [], $warnCallback = null ) { extract( self::getRecognizedTagData( $extratags, $removetags ) ); @@ -540,6 +547,14 @@ class Sanitizer { $badtag = true; #  Is it a self closed htmlpair ? (bug 5487) } elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) { + // Eventually we'll just remove the self-closing + // slash, in order to be consistent with HTML5 + // semantics. + // $brace = '>'; + // For now, let's just warn authors to clean up. + if ( is_callable( $warnCallback ) ) { + call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] ); + } $badtag = true; } elseif ( isset( $htmlsingleonly[$t] ) ) { # Hack to force empty tag for unclosable elements @@ -604,12 +619,29 @@ class Sanitizer { call_user_func_array( $processCallback, [ &$params, $args ] ); } + if ( $brace == '/>' && !( isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) { + // Eventually we'll just remove the self-closing + // slash, in order to be consistent with HTML5 + // semantics. + // $brace = '>'; + // For now, let's just warn authors to clean up. + if ( is_callable( $warnCallback ) ) { + call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] ); + } + } if ( !Sanitizer::validateTag( $params, $t ) ) { $badtag = true; } $newparams = Sanitizer::fixTagAttributes( $params, $t ); if ( !$badtag ) { + if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) { + # Interpret self-closing tags as empty tags even when + # HTML 5 would interpret them as start tags. Such input + # is commonly seen on Wikimedia wikis with this intention. + $brace = ">"; + } + $rest = str_replace( '>', '>', $rest ); $text .= "<$slash$t$newparams$brace$rest"; continue; @@ -734,15 +766,13 @@ class Sanitizer { * @todo Check for unique id attribute :P */ static function validateAttributes( $attribs, $whitelist ) { - global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes; - $whitelist = array_flip( $whitelist ); $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/'; $out = []; foreach ( $attribs as $attribute => $value ) { - # allow XML namespace declaration if RDFa is enabled - if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) { + # Allow XML namespace declaration to allow RDFa + if ( preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) { if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) { $out[$attribute] = $value; } @@ -817,15 +847,14 @@ class Sanitizer { $out[$attribute] = $value; } - if ( $wgAllowMicrodataAttributes ) { - # itemtype, itemid, itemref don't make sense without itemscope - if ( !array_key_exists( 'itemscope', $out ) ) { - unset( $out['itemtype'] ); - unset( $out['itemid'] ); - unset( $out['itemref'] ); - } - # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref. + # itemtype, itemid, itemref don't make sense without itemscope + if ( !array_key_exists( 'itemscope', $out ) ) { + unset( $out['itemtype'] ); + unset( $out['itemid'] ); + unset( $out['itemref'] ); } + # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref. + return $out; } @@ -1031,12 +1060,14 @@ class Sanitizer { * - Double attributes are discarded * - Unsafe style attributes are discarded * - Prepends space if there are attributes. + * - (Optionally) Sorts attributes by name. * * @param string $text * @param string $element + * @param bool $sorted Whether to sort the attributes (default: false) * @return string */ - static function fixTagAttributes( $text, $element ) { + static function fixTagAttributes( $text, $element, $sorted = false ) { if ( trim( $text ) == '' ) { return ''; } @@ -1044,6 +1075,10 @@ class Sanitizer { $decoded = Sanitizer::decodeTagAttributes( $text ); $stripped = Sanitizer::validateTagAttributes( $decoded, $element ); + if ( $sorted ) { + ksort( $stripped ); + } + return Sanitizer::safeEncodeTagAttributes( $stripped ); } @@ -1561,12 +1596,9 @@ class Sanitizer { * @return array */ static function setupAttributeWhitelist() { - global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes; - static $whitelist, $staticInitialised; + static $whitelist; - $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) ); - - if ( $whitelist !== null && $staticInitialised == $globalContext ) { + if ( $whitelist !== null ) { return $whitelist; } @@ -1586,23 +1618,24 @@ class Sanitizer { 'aria-labelledby', 'aria-owns', 'role', - ]; - if ( $wgAllowRdfaAttributes ) { - # RDFa attributes as specified in section 9 of + # RDFa + # These attributes are specified in section 9 of # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014 - $common = array_merge( $common, [ - 'about', 'property', 'resource', 'datatype', 'typeof', - ] ); - } + 'about', + 'property', + 'resource', + 'datatype', + 'typeof', - if ( $wgAllowMicrodataAttributes ) { - # add HTML5 microdata tags as specified by + # Microdata. These are specified by # http://www.whatwg.org/html/microdata.html#the-microdata-model - $common = array_merge( $common, [ - 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype' - ] ); - } + 'itemid', + 'itemprop', + 'itemref', + 'itemscope', + 'itemtype', + ]; $block = array_merge( $common, [ 'align' ] ); $tablealign = [ 'align', 'valign' ]; @@ -1773,8 +1806,6 @@ class Sanitizer { 'link' => [ 'itemprop', 'href' ], ]; - $staticInitialised = $globalContext; - return $whitelist; }