X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=abc11f87b4b19dc5f8d20a7027ed2c22b90fe3f3;hb=a80f428025003d7be531e40655625ddf85c29e89;hp=51c751cef324c42ff9c6a4da1882d1c04fdf987f;hpb=3a6ac5a3c3c59c15d7034b914f9549ce60fb7712;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 51c751cef3..abc11f87b4 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,7 +20,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @addtogroup Parser + * @file + * @ingroup Parser */ /** @@ -28,7 +29,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -42,7 +43,7 @@ define( 'MW_CHAR_REFS_REGEX', $attrib = '[A-Za-z0-9]'; $space = '[\x09\x0a\x0d\x20]'; define( 'MW_ATTRIBS_REGEX', - "/(?:^|$space)($attrib+) + "/(?:^|$space)((?:xml:|xmlns:)?$attrib+) ($space*=$space* (?: # The attribute value: quoted or alone @@ -55,6 +56,16 @@ define( 'MW_ATTRIBS_REGEX', ) )?(?=$space|\$)/sx" ); +/** + * Regular expression to match URIs that could trigger script execution + */ +define( 'MW_EVIL_URI_PATTERN', '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i' ); + +/** + * Regular expression to match namespace attributes + */ +define( 'MW_XMLNS_ATTRIBUTE_PATTRN', "/^xmlns:$attrib+$/" ); + /** * List of all named character entities defined in HTML 4.01 * http://www.w3.org/TR/html4/sgml/entities.html @@ -315,107 +326,133 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + +/** + * XHTML sanitizer for MediaWiki + * @ingroup Parser + */ class Sanitizer { /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments * @private - * @param string $text - * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values - * @param array $args for the processing callback + * @param $text String + * @param $processCallback Callback to do any variable or parameter replacements in HTML attribute values + * @param $args Array for the processing callback + * @param $extratags Array for any extra tags to include + * @param $removetags Array for any tags (default or extra) to exclude * @return string */ - static function removeHTMLtags( $text, $processCallback = null, $args = array() ) { - global $wgUseTidy, $wgUserHtml; + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) { + global $wgUseTidy; - static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, - $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; + static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; wfProfileIn( __METHOD__ ); if ( !$staticInitialised ) { - if( $wgUserHtml ) { - $htmlpairs = array( # Tags that must be closed - 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', - 'strike', 'strong', 'tt', 'var', 'div', 'center', - 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', - 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' - ); - $htmlsingle = array( - 'br', 'hr', 'li', 'dt', 'dd' - ); - $htmlsingleonly = array( # Elements that cannot have close tags - 'br', 'hr' - ); - $htmlnest = array( # Tags that can be nested--?? - 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', - 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' - ); - $tabletags = array( # Can only appear inside table, we will close them - 'td', 'th', 'tr', - ); - $htmllist = array( # Tags used by list - 'ul','ol', - ); - $listtags = array( # Tags that can appear in a list - 'li', - ); - } else { - $htmlpairs = array(); - $htmlsingle = array(); - $htmlnest = array(); - $tabletags = array(); + $htmlpairsStatic = array( # Tags that must be closed + 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', + 'strike', 'strong', 'tt', 'var', 'div', 'center', + 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', + 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr' + ); + $htmlsingle = array( + 'br', 'hr', 'li', 'dt', 'dd' + ); + $htmlsingleonly = array( # Elements that cannot have close tags + 'br', 'hr' + ); + $htmlnest = array( # Tags that can be nested--?? + 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', + 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' + ); + $tabletags = array( # Can only appear inside table, we will close them + 'td', 'th', 'tr', + ); + $htmllist = array( # Tags used by list + 'ul','ol', + ); + $listtags = array( # Tags that can appear in a list + 'li', + ); + + global $wgAllowImageTag; + if ( $wgAllowImageTag ) { + $htmlsingle[] = 'img'; + $htmlsingleonly[] = 'img'; } - $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); + $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) ); + $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) ); # Convert them all to hashtables for faster lookup - $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', - 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' ); + $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' ); foreach ( $vars as $var ) { $$var = array_flip( $$var ); } $staticInitialised = true; } + # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays + $extratags = array_flip( $extratags ); + $removetags = array_flip( $removetags ); + $htmlpairs = array_merge( $extratags, $htmlpairsStatic ); + $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ) , $removetags ); # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); $text = str_replace( '>', '>', array_shift( $bits ) ); - if(!$wgUseTidy) { + if ( !$wgUseTidy ) { $tagstack = $tablestack = array(); foreach ( $bits as $x ) { $regs = array(); + # $slash: Does the current element start with a '/'? + # $t: Current element name + # $params: String between element name and > + # $brace: Ending '>' or '/>' + # $rest: Everything until the next element of $bits if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; } else { $slash = $t = $params = $brace = $rest = null; } - $badtag = 0 ; + $badtag = false; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack - if ( $slash ) { - # Closing a tag... - if( isset( $htmlsingleonly[$t] ) ) { - $badtag = 1; - } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { + if ( $slash && isset( $htmlsingleonly[$t] ) ) { + $badtag = true; + } elseif ( $slash ) { + # Closing a tag... is it the one we just opened? + $ot = @array_pop( $tagstack ); + if ( $ot != $t ) { if ( isset( $htmlsingleallowed[$ot] ) ) { # Pop all elements with an optional close tag # and see if we find a match below them $optstack = array(); - array_push ($optstack, $ot); - while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) && - isset( $htmlsingleallowed[$ot] ) ) - { - array_push ($optstack, $ot); + array_push( $optstack, $ot ); + $ot = @array_pop( $tagstack ); + while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) { + array_push( $optstack, $ot ); + $ot = @array_pop( $tagstack ); } if ( $t != $ot ) { - # No match. Push the optinal elements back again - $badtag = 1; + # No match. Push the optional elements back again + $badtag = true; while ( $ot = @array_pop( $optstack ) ) { array_push( $tagstack, $ot ); } @@ -423,8 +460,8 @@ class Sanitizer { } else { @array_push( $tagstack, $ot ); #
  • can be nested in