X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=2def06a9dd4d1ce27add861c92695c9c4ae7568e;hb=50ea235528289fa3b7b31a2281c6c7683053b7f4;hp=8424432f94595ca1edd415300761f1bda1331702;hpb=01c3bf3431e9754b79e4a4a31fa74ce9e6616514;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 8424432f94..2def06a9dd 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -339,8 +339,8 @@ class Sanitizer { */ static function getAttribsRegex() { if ( self::$attribsRegex === null ) { - $attribFirst = '[:A-Z_a-z0-9]'; - $attrib = '[:A-Z_a-z-.0-9]'; + $attribFirst = "[:_\p{L}\p{N}]"; + $attrib = "[:_\.\-\p{L}\p{N}]"; $space = '[\x09\x0a\x0c\x0d\x20]'; self::$attribsRegex = "/(?:^|$space)({$attribFirst}{$attrib}*) @@ -351,7 +351,7 @@ class Sanitizer { | '([^']*)(?:'|\$) | (((?!$space|>).)*) ) - )?(?=$space|\$)/sx"; + )?(?=$space|\$)/sxu"; } return self::$attribsRegex; } @@ -465,7 +465,7 @@ class Sanitizer { extract( self::getRecognizedTagData( $extratags, $removetags ) ); # Remove HTML comments - $text = Sanitizer::removeHTMLcomments( $text ); + $text = self::removeHTMLcomments( $text ); $bits = explode( '<', $text ); $text = str_replace( '>', '>', array_shift( $bits ) ); if ( !MWTidy::isEnabled() ) { @@ -583,12 +583,12 @@ class Sanitizer { call_user_func_array( $processCallback, [ &$params, $args ] ); } - if ( !Sanitizer::validateTag( $params, $t ) ) { + if ( !self::validateTag( $params, $t ) ) { $badtag = true; } # Strip non-approved attributes from the tag - $newparams = Sanitizer::fixTagAttributes( $params, $t ); + $newparams = self::fixTagAttributes( $params, $t ); } if ( !$badtag ) { $rest = str_replace( '>', '>', $rest ); @@ -629,11 +629,11 @@ class Sanitizer { call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] ); } } - if ( !Sanitizer::validateTag( $params, $t ) ) { + if ( !self::validateTag( $params, $t ) ) { $badtag = true; } - $newparams = Sanitizer::fixTagAttributes( $params, $t ); + $newparams = self::fixTagAttributes( $params, $t ); if ( !$badtag ) { if ( $brace === '/>' && !isset( $htmlsingleonly[$t] ) ) { # Interpret self-closing tags as empty tags even when @@ -710,7 +710,7 @@ class Sanitizer { * @return bool */ static function validateTag( $params, $element ) { - $params = Sanitizer::decodeTagAttributes( $params ); + $params = self::decodeTagAttributes( $params ); if ( $element == 'meta' || $element == 'link' ) { if ( !isset( $params['itemprop'] ) ) { @@ -746,8 +746,8 @@ class Sanitizer { * @todo Check for unique id attribute :P */ static function validateTagAttributes( $attribs, $element ) { - return Sanitizer::validateAttributes( $attribs, - Sanitizer::attributeWhitelist( $element ) ); + return self::validateAttributes( $attribs, + self::attributeWhitelist( $element ) ); } /** @@ -795,12 +795,12 @@ class Sanitizer { # Strip javascript "expression" from stylesheets. # https://msdn.microsoft.com/en-us/library/ms537634.aspx if ( $attribute == 'style' ) { - $value = Sanitizer::checkCss( $value ); + $value = self::checkCss( $value ); } # Escape HTML id attributes if ( $attribute === 'id' ) { - $value = Sanitizer::escapeId( $value, 'noninitial' ); + $value = self::escapeId( $value, 'noninitial' ); } # Escape HTML id reference lists @@ -809,7 +809,7 @@ class Sanitizer { || $attribute === 'aria-labelledby' || $attribute === 'aria-owns' ) { - $value = Sanitizer::escapeIdReferenceList( $value, 'noninitial' ); + $value = self::escapeIdReferenceList( $value, 'noninitial' ); } // RDFa and microdata properties allow URLs, URIs and/or CURIs. @@ -906,9 +906,8 @@ class Sanitizer { * @return string normalized css */ public static function normalizeCss( $value ) { - // Decode character references like { - $value = Sanitizer::decodeCharReferences( $value ); + $value = self::decodeCharReferences( $value ); // Decode escape sequences and line continuation // See the grammar in the CSS 2 spec, appendix D. @@ -1088,14 +1087,14 @@ class Sanitizer { return ''; } - $decoded = Sanitizer::decodeTagAttributes( $text ); - $stripped = Sanitizer::validateTagAttributes( $decoded, $element ); + $decoded = self::decodeTagAttributes( $text ); + $stripped = self::validateTagAttributes( $decoded, $element ); if ( $sorted ) { ksort( $stripped ); } - return Sanitizer::safeEncodeTagAttributes( $stripped ); + return self::safeEncodeTagAttributes( $stripped ); } /** @@ -1125,7 +1124,7 @@ class Sanitizer { * @return string HTML-encoded text fragment */ static function safeEncodeAttribute( $text ) { - $encValue = Sanitizer::encodeAttribute( $text ); + $encValue = self::encodeAttribute( $text ); # Templates and links may be expanded in later parsing, # creating invalid or dangerous output. Suppress this. @@ -1187,7 +1186,7 @@ class Sanitizer { global $wgExperimentalHtmlIds; $options = (array)$options; - $id = Sanitizer::decodeCharReferences( $id ); + $id = self::decodeCharReferences( $id ); if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) { $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); @@ -1239,7 +1238,7 @@ class Sanitizer { # Escape each token as an id foreach ( $references as &$ref ) { - $ref = Sanitizer::escapeId( $ref, $options ); + $ref = self::escapeId( $ref, $options ); } # Merge the array back to a space delimited list string @@ -1276,7 +1275,7 @@ class Sanitizer { * @return string Escaped input */ static function escapeHtmlAllowEntities( $html ) { - $html = Sanitizer::decodeCharReferences( $html ); + $html = self::decodeCharReferences( $html ); # It seems wise to escape ' as well as ", as a matter of course. Can't # hurt. Use ENT_SUBSTITUTE so that incorrectly truncated multibyte characters # don't cause the entire string to disappear. @@ -1318,14 +1317,14 @@ class Sanitizer { foreach ( $pairs as $set ) { $attribute = strtolower( $set[1] ); - $value = Sanitizer::getTagAttributeCallback( $set ); + $value = self::getTagAttributeCallback( $set ); // Normalize whitespace $value = preg_replace( '/[\t\r\n ]+/', ' ', $value ); $value = trim( $value ); // Decode character references - $attribs[$attribute] = Sanitizer::decodeCharReferences( $value ); + $attribs[$attribute] = self::decodeCharReferences( $value ); } return $attribs; } @@ -1341,7 +1340,7 @@ class Sanitizer { $attribs = []; foreach ( $assoc_array as $attribute => $value ) { $encAttribute = htmlspecialchars( $attribute ); - $encValue = Sanitizer::safeEncodeAttribute( $value ); + $encValue = self::safeEncodeAttribute( $value ); $attribs[] = "$encAttribute=\"$encValue\""; } @@ -1428,11 +1427,11 @@ class Sanitizer { static function normalizeCharReferencesCallback( $matches ) { $ret = null; if ( $matches[1] != '' ) { - $ret = Sanitizer::normalizeEntity( $matches[1] ); + $ret = self::normalizeEntity( $matches[1] ); } elseif ( $matches[2] != '' ) { - $ret = Sanitizer::decCharReference( $matches[2] ); + $ret = self::decCharReference( $matches[2] ); } elseif ( $matches[3] != '' ) { - $ret = Sanitizer::hexCharReference( $matches[3] ); + $ret = self::hexCharReference( $matches[3] ); } if ( is_null( $ret ) ) { return htmlspecialchars( $matches[0] ); @@ -1469,7 +1468,7 @@ class Sanitizer { */ static function decCharReference( $codepoint ) { $point = intval( $codepoint ); - if ( Sanitizer::validateCodepoint( $point ) ) { + if ( self::validateCodepoint( $point ) ) { return sprintf( '&#%d;', $point ); } else { return null; @@ -1482,7 +1481,7 @@ class Sanitizer { */ static function hexCharReference( $codepoint ) { $point = hexdec( $codepoint ); - if ( Sanitizer::validateCodepoint( $point ) ) { + if ( self::validateCodepoint( $point ) ) { return sprintf( '&#x%x;', $point ); } else { return null; @@ -1551,11 +1550,11 @@ class Sanitizer { */ static function decodeCharReferencesCallback( $matches ) { if ( $matches[1] != '' ) { - return Sanitizer::decodeEntity( $matches[1] ); + return self::decodeEntity( $matches[1] ); } elseif ( $matches[2] != '' ) { - return Sanitizer::decodeChar( intval( $matches[2] ) ); + return self::decodeChar( intval( $matches[2] ) ); } elseif ( $matches[3] != '' ) { - return Sanitizer::decodeChar( hexdec( $matches[3] ) ); + return self::decodeChar( hexdec( $matches[3] ) ); } # Last case should be an ampersand by itself return $matches[0]; @@ -1569,7 +1568,7 @@ class Sanitizer { * @private */ static function decodeChar( $codepoint ) { - if ( Sanitizer::validateCodepoint( $codepoint ) ) { + if ( self::validateCodepoint( $codepoint ) ) { return UtfNormal\Utils::codepointToUtf8( $codepoint ); } else { return UtfNormal\Constants::UTF8_REPLACEMENT; @@ -1602,7 +1601,7 @@ class Sanitizer { * @return array */ static function attributeWhitelist( $element ) { - $list = Sanitizer::setupAttributeWhitelist(); + $list = self::setupAttributeWhitelist(); return isset( $list[$element] ) ? $list[$element] : []; @@ -1877,7 +1876,7 @@ class Sanitizer { static function cleanUrl( $url ) { # Normalize any HTML entities in input. They will be # re-escaped by makeExternalLink(). - $url = Sanitizer::decodeCharReferences( $url ); + $url = self::decodeCharReferences( $url ); # Escape any control characters introduced by the above step $url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/',