Disallow css attr() with url type
[lhc/web/wiklou.git] / includes / Sanitizer.php
index d52bc07..7cd21d8 100644 (file)
@@ -363,14 +363,14 @@ class Sanitizer {
         * @return array
         */
        public static function getRecognizedTagData( $extratags = [], $removetags = [] ) {
-               global $wgAllowMicrodataAttributes, $wgAllowImageTag;
+               global $wgAllowImageTag;
 
                static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
                        $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
 
                // Base our staticInitialised variable off of the global config state so that if the globals
                // are changed (like in the screwed up test system) we will re-initialise the settings.
-               $globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
+               $globalContext = $wgAllowImageTag;
                if ( !$staticInitialised || $staticInitialised != $globalContext ) {
                        $htmlpairsStatic = [ # Tags that must be closed
                                'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
@@ -381,15 +381,18 @@ class Sanitizer {
                                'kbd', 'samp', 'data', 'time', 'mark'
                        ];
                        $htmlsingle = [
-                               'br', 'wbr', 'hr', 'li', 'dt', 'dd'
+                               'br', 'wbr', 'hr', 'li', 'dt', 'dd', 'meta', 'link'
                        ];
-                       $htmlsingleonly = [ # Elements that cannot have close tags
-                               'br', 'wbr', 'hr'
+
+                       # Elements that cannot have close tags. This is (not coincidentally)
+                       # also the list of tags for which the HTML 5 parsing algorithm
+                       # requires you to "acknowledge the token's self-closing flag", i.e.
+                       # a self-closing tag like <br/> is not an HTML 5 parse error only
+                       # for this list.
+                       $htmlsingleonly = [
+                               'br', 'wbr', 'hr', 'meta', 'link'
                        ];
-                       if ( $wgAllowMicrodataAttributes ) {
-                               $htmlsingle[] = $htmlsingleonly[] = 'meta';
-                               $htmlsingle[] = $htmlsingleonly[] = 'link';
-                       }
+
                        $htmlnest = [ # Tags that can be nested--??
                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
                                'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span',
@@ -450,10 +453,14 @@ class Sanitizer {
         * @param array|bool $args Arguments for the processing callback
         * @param array $extratags For any extra tags to include
         * @param array $removetags For any tags (default or extra) to exclude
+        * @param callable $warnCallback (Deprecated) Callback allowing the
+        *   addition of a tracking category when bad input is encountered.
+        *   DO NOT ADD NEW PARAMETERS AFTER $warnCallback, since it will be
+        *   removed shortly.
         * @return string
         */
        public static function removeHTMLtags( $text, $processCallback = null,
-               $args = [], $extratags = [], $removetags = []
+               $args = [], $extratags = [], $removetags = [], $warnCallback = null
        ) {
                extract( self::getRecognizedTagData( $extratags, $removetags ) );
 
@@ -540,6 +547,14 @@ class Sanitizer {
                                                        $badtag = true;
                                                #  Is it a self closed htmlpair ? (bug 5487)
                                                } elseif ( $brace == '/>' && isset( $htmlpairs[$t] ) ) {
+                                                       // Eventually we'll just remove the self-closing
+                                                       // slash, in order to be consistent with HTML5
+                                                       // semantics.
+                                                       // $brace = '>';
+                                                       // For now, let's just warn authors to clean up.
+                                                       if ( is_callable( $warnCallback ) ) {
+                                                               call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
+                                                       }
                                                        $badtag = true;
                                                } elseif ( isset( $htmlsingleonly[$t] ) ) {
                                                        # Hack to force empty tag for unclosable elements
@@ -604,12 +619,29 @@ class Sanitizer {
                                                        call_user_func_array( $processCallback, [ &$params, $args ] );
                                                }
 
+                                               if ( $brace == '/>' && !( isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
+                                                       // Eventually we'll just remove the self-closing
+                                                       // slash, in order to be consistent with HTML5
+                                                       // semantics.
+                                                       // $brace = '>';
+                                                       // For now, let's just warn authors to clean up.
+                                                       if ( is_callable( $warnCallback ) ) {
+                                                               call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
+                                                       }
+                                               }
                                                if ( !Sanitizer::validateTag( $params, $t ) ) {
                                                        $badtag = true;
                                                }
 
                                                $newparams = Sanitizer::fixTagAttributes( $params, $t );
                                                if ( !$badtag ) {
+                                                       if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) {
+                                                               # Interpret self-closing tags as empty tags even when
+                                                               # HTML 5 would interpret them as start tags. Such input
+                                                               # is commonly seen on Wikimedia wikis with this intention.
+                                                               $brace = "></$t>";
+                                                       }
+
                                                        $rest = str_replace( '>', '&gt;', $rest );
                                                        $text .= "<$slash$t$newparams$brace$rest";
                                                        continue;
@@ -734,15 +766,13 @@ class Sanitizer {
         * @todo Check for unique id attribute :P
         */
        static function validateAttributes( $attribs, $whitelist ) {
-               global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
-
                $whitelist = array_flip( $whitelist );
                $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
 
                $out = [];
                foreach ( $attribs as $attribute => $value ) {
-                       # allow XML namespace declaration if RDFa is enabled
-                       if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
+                       # Allow XML namespace declaration to allow RDFa
+                       if ( preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
                                if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
                                        $out[$attribute] = $value;
                                }
@@ -817,15 +847,14 @@ class Sanitizer {
                        $out[$attribute] = $value;
                }
 
-               if ( $wgAllowMicrodataAttributes ) {
-                       # itemtype, itemid, itemref don't make sense without itemscope
-                       if ( !array_key_exists( 'itemscope', $out ) ) {
-                               unset( $out['itemtype'] );
-                               unset( $out['itemid'] );
-                               unset( $out['itemref'] );
-                       }
-                       # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
+               # itemtype, itemid, itemref don't make sense without itemscope
+               if ( !array_key_exists( 'itemscope', $out ) ) {
+                       unset( $out['itemtype'] );
+                       unset( $out['itemid'] );
+                       unset( $out['itemref'] );
                }
+               # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
+
                return $out;
        }
 
@@ -986,6 +1015,7 @@ class Sanitizer {
                                | url\s*\(
                                | image\s*\(
                                | image-set\s*\(
+                               | attr\s*\([^)]+[\s,]+url
                        !ix', $value ) ) {
                        return '/* insecure input */';
                }
@@ -1031,12 +1061,14 @@ class Sanitizer {
         * - Double attributes are discarded
         * - Unsafe style attributes are discarded
         * - Prepends space if there are attributes.
+        * - (Optionally) Sorts attributes by name.
         *
         * @param string $text
         * @param string $element
+        * @param bool $sorted Whether to sort the attributes (default: false)
         * @return string
         */
-       static function fixTagAttributes( $text, $element ) {
+       static function fixTagAttributes( $text, $element, $sorted = false ) {
                if ( trim( $text ) == '' ) {
                        return '';
                }
@@ -1044,6 +1076,10 @@ class Sanitizer {
                $decoded = Sanitizer::decodeTagAttributes( $text );
                $stripped = Sanitizer::validateTagAttributes( $decoded, $element );
 
+               if ( $sorted ) {
+                       ksort( $stripped );
+               }
+
                return Sanitizer::safeEncodeTagAttributes( $stripped );
        }
 
@@ -1561,12 +1597,9 @@ class Sanitizer {
         * @return array
         */
        static function setupAttributeWhitelist() {
-               global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
-               static $whitelist, $staticInitialised;
+               static $whitelist;
 
-               $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) );
-
-               if ( $whitelist !== null && $staticInitialised == $globalContext ) {
+               if ( $whitelist !== null ) {
                        return $whitelist;
                }
 
@@ -1586,23 +1619,24 @@ class Sanitizer {
                        'aria-labelledby',
                        'aria-owns',
                        'role',
-               ];
 
-               if ( $wgAllowRdfaAttributes ) {
-                       # RDFa attributes as specified in section 9 of
+                       # RDFa
+                       # These attributes are specified in section 9 of
                        # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
-                       $common = array_merge( $common, [
-                               'about', 'property', 'resource', 'datatype', 'typeof',
-                       ] );
-               }
+                       'about',
+                       'property',
+                       'resource',
+                       'datatype',
+                       'typeof',
 
-               if ( $wgAllowMicrodataAttributes ) {
-                       # add HTML5 microdata tags as specified by
+                       # Microdata. These are specified by
                        # http://www.whatwg.org/html/microdata.html#the-microdata-model
-                       $common = array_merge( $common, [
-                               'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
-                       ] );
-               }
+                       'itemid',
+                       'itemprop',
+                       'itemref',
+                       'itemscope',
+                       'itemtype',
+               ];
 
                $block = array_merge( $common, [ 'align' ] );
                $tablealign = [ 'align', 'valign' ];
@@ -1773,8 +1807,6 @@ class Sanitizer {
                        'link' => [ 'itemprop', 'href' ],
                ];
 
-               $staticInitialised = $globalContext;
-
                return $whitelist;
        }