X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=abc11f87b4b19dc5f8d20a7027ed2c22b90fe3f3;hb=a80f428025003d7be531e40655625ddf85c29e89;hp=e2f0f272a8de0d3633a88e89b0c042ee1876a86e;hpb=8c3822b9b7074ff9d4f7692dd5f5b4fea678c475;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index e2f0f272a8..abc11f87b4 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,7 +20,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @addtogroup Parser + * @file + * @ingroup Parser */ /** @@ -42,7 +43,7 @@ define( 'MW_CHAR_REFS_REGEX', $attrib = '[A-Za-z0-9]'; $space = '[\x09\x0a\x0d\x20]'; define( 'MW_ATTRIBS_REGEX', - "/(?:^|$space)($attrib+) + "/(?:^|$space)((?:xml:|xmlns:)?$attrib+) ($space*=$space* (?: # The attribute value: quoted or alone @@ -55,6 +56,16 @@ define( 'MW_ATTRIBS_REGEX', ) )?(?=$space|\$)/sx" ); +/** + * Regular expression to match URIs that could trigger script execution + */ +define( 'MW_EVIL_URI_PATTERN', '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i' ); + +/** + * Regular expression to match namespace attributes + */ +define( 'MW_XMLNS_ATTRIBUTE_PATTRN', "/^xmlns:$attrib+$/" ); + /** * List of all named character entities defined in HTML 4.01 * http://www.w3.org/TR/html4/sgml/entities.html @@ -327,35 +338,37 @@ $wgHtmlEntityAliases = array( /** * XHTML sanitizer for MediaWiki - * @addtogroup Parser + * @ingroup Parser */ class Sanitizer { /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments * @private - * @param string $text - * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values - * @param array $args for the processing callback + * @param $text String + * @param $processCallback Callback to do any variable or parameter replacements in HTML attribute values + * @param $args Array for the processing callback + * @param $extratags Array for any extra tags to include + * @param $removetags Array for any tags (default or extra) to exclude * @return string */ - static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array() ) { + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) { global $wgUseTidy; - static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, - $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; + static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; wfProfileIn( __METHOD__ ); if ( !$staticInitialised ) { - $htmlpairs = array_merge( $extratags, array( # Tags that must be closed + $htmlpairsStatic = array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', - 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' - ) ); + 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr' + ); $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); @@ -376,53 +389,70 @@ class Sanitizer { 'li', ); - $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); + global $wgAllowImageTag; + if ( $wgAllowImageTag ) { + $htmlsingle[] = 'img'; + $htmlsingleonly[] = 'img'; + } + + $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) ); + $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) ); # Convert them all to hashtables for faster lookup - $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', - 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' ); + $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' ); foreach ( $vars as $var ) { $$var = array_flip( $$var ); } $staticInitialised = true; } + # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays + $extratags = array_flip( $extratags ); + $removetags = array_flip( $removetags ); + $htmlpairs = array_merge( $extratags, $htmlpairsStatic ); + $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ) , $removetags ); # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); $text = str_replace( '>', '>', array_shift( $bits ) ); - if(!$wgUseTidy) { + if ( !$wgUseTidy ) { $tagstack = $tablestack = array(); foreach ( $bits as $x ) { $regs = array(); + # $slash: Does the current element start with a '/'? + # $t: Current element name + # $params: String between element name and > + # $brace: Ending '>' or '/>' + # $rest: Everything until the next element of $bits if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; } else { $slash = $t = $params = $brace = $rest = null; } - $badtag = 0 ; + $badtag = false; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack - if ( $slash ) { - # Closing a tag... - if( isset( $htmlsingleonly[$t] ) ) { - $badtag = 1; - } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { + if ( $slash && isset( $htmlsingleonly[$t] ) ) { + $badtag = true; + } elseif ( $slash ) { + # Closing a tag... is it the one we just opened? + $ot = @array_pop( $tagstack ); + if ( $ot != $t ) { if ( isset( $htmlsingleallowed[$ot] ) ) { # Pop all elements with an optional close tag # and see if we find a match below them $optstack = array(); - array_push ($optstack, $ot); - while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) && - isset( $htmlsingleallowed[$ot] ) ) - { - array_push ($optstack, $ot); + array_push( $optstack, $ot ); + $ot = @array_pop( $tagstack ); + while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) { + array_push( $optstack, $ot ); + $ot = @array_pop( $tagstack ); } if ( $t != $ot ) { - # No match. Push the optinal elements back again - $badtag = 1; + # No match. Push the optional elements back again + $badtag = true; while ( $ot = @array_pop( $optstack ) ) { array_push( $tagstack, $ot ); } @@ -430,8 +460,8 @@ class Sanitizer { } else { @array_push( $tagstack, $ot ); #
  • can be nested in