X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=51c751cef324c42ff9c6a4da1882d1c04fdf987f;hb=4e250a20b9fde4e72128f391fae65760196795fb;hp=49a5f10d3e925d537f38b83fc23f81c80cf6370b;hpb=61af76f260c66bb354e3160c87b24d3f1e4ac37f;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 49a5f10d3e..51c751cef3 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,8 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage Parser + * @addtogroup Parser */ /** @@ -316,7 +315,6 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); -/** @package MediaWiki */ class Sanitizer { /** * Cleans up HTML, removes dangerous tags and attributes, and @@ -330,11 +328,11 @@ class Sanitizer { static function removeHTMLtags( $text, $processCallback = null, $args = array() ) { global $wgUseTidy, $wgUserHtml; - static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; - + wfProfileIn( __METHOD__ ); - + if ( !$staticInitialised ) { if( $wgUserHtml ) { $htmlpairs = array( # Tags that must be closed @@ -354,7 +352,7 @@ class Sanitizer { 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' ); - $tabletags = array( # Can only appear inside table + $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', ); $htmllist = array( # Tags used by list @@ -386,14 +384,16 @@ class Sanitizer { # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); - $text = array_shift( $bits ); + $text = str_replace( '>', '>', array_shift( $bits ) ); if(!$wgUseTidy) { $tagstack = $tablestack = array(); foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); + $regs = array(); + if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { + list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + } else { + $slash = $t = $params = $brace = $rest = null; + } $badtag = 0 ; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { @@ -451,6 +451,10 @@ class Sanitizer { } else if( isset( $htmlsingle[$t] ) ) { # Hack to not close $htmlsingle tags $brace = NULL; + } else if( isset( $tabletags[$t] ) + && in_array($t ,$tagstack) ) { + // New table tag but forgot to close the previous one + $text .= ""; } else { if ( $t == 'table' ) { array_push( $tablestack, $tagstack ); @@ -470,7 +474,7 @@ class Sanitizer { } if ( ! $badtag ) { $rest = str_replace( '>', '>', $rest ); - $close = ( $brace == '/>' ) ? ' /' : ''; + $close = ( $brace == '/>' && !$slash ) ? ' /' : ''; $text .= "<$slash$t$newparams$close>$rest"; continue; } @@ -487,7 +491,7 @@ class Sanitizer { foreach ( $bits as $x ) { preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs ); - @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; + @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { if( is_callable( $processCallback ) ) { call_user_func_array( $processCallback, array( &$params, $args ) ); @@ -643,15 +647,15 @@ class Sanitizer { if( trim( $text ) == '' ) { return ''; } - + $stripped = Sanitizer::validateTagAttributes( Sanitizer::decodeTagAttributes( $text ), $element ); - + $attribs = array(); foreach( $stripped as $attribute => $value ) { $encAttribute = htmlspecialchars( $attribute ); $encValue = Sanitizer::safeEncodeAttribute( $value ); - + $attribs[] = "$encAttribute=\"$encValue\""; } return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; @@ -664,7 +668,7 @@ class Sanitizer { */ static function encodeAttribute( $text ) { $encValue = htmlspecialchars( $text ); - + // Whitespace is normalized during attribute decoding, // so if we've been passed non-spaces we must encode them // ahead of time or they won't be preserved. @@ -673,10 +677,10 @@ class Sanitizer { "\r" => ' ', "\t" => ' ', ) ); - + return $encValue; } - + /** * Encode an attribute value for HTML tags, with extra armoring * against further wiki processing. @@ -685,7 +689,7 @@ class Sanitizer { */ static function safeEncodeAttribute( $text ) { $encValue = Sanitizer::encodeAttribute( $text ); - + # Templates and links may be expanded in later parsing, # creating invalid or dangerous output. Suppress this. $encValue = strtr( $encValue, array( @@ -741,7 +745,7 @@ class Sanitizer { * Given a value, escape it so that it can be used as a CSS class and * return it. * - * TODO: For extra validity, input should be validated UTF-8. + * @todo For extra validity, input should be validated UTF-8. * * @link http://www.w3.org/TR/CSS21/syndata.html Valid characters/format * @@ -793,11 +797,11 @@ class Sanitizer { foreach( $pairs as $set ) { $attribute = strtolower( $set[1] ); $value = Sanitizer::getTagAttributeCallback( $set ); - + // Normalize whitespace $value = preg_replace( '/[\t\r\n ]+/', ' ', $value ); $value = trim( $value ); - + // Decode character references $attribs[$attribute] = Sanitizer::decodeCharReferences( $value ); } @@ -1213,7 +1217,7 @@ class Sanitizer { $out .= "]>\n"; return $out; } - + static function cleanUrl( $url, $hostname=true ) { # Normalize any HTML entities in input. They will be # re-escaped by makeExternalLink(). @@ -1221,11 +1225,12 @@ class Sanitizer { # Escape any control characters introduced by the above step $url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url ); - + # Validate hostname portion + $matches = array(); if( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) { - list( $whole, $protocol, $host, $rest ) = $matches; - + list( /* $whole */, $protocol, $host, $rest ) = $matches; + // Characters that will be ignored in IDNs. // http://tools.ietf.org/html/3454#section-3.1 // Strip them before further processing so blacklists and such work. @@ -1244,11 +1249,11 @@ class Sanitizer { \xe2\x80\x8d| # 200d ZERO WIDTH JOINER [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe00f VARIATION SELECTOR-1-16 /xuD"; - + $host = preg_replace( $strip, '', $host ); - + // @fixme: validate hostnames here - + return $protocol . $host . $rest; } else { return $url;