X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=c1c8daf37affbb40f27444e115068ab392dee7c9;hb=08aec83eb97be75b0fc9bdff67f3652c33ef1cbc;hp=f2dde9281a7a5f7a61abd53e239cf6b6b8319374;hpb=fd912a148b69ff488593ebd6d5e8da3b5f1cab08;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index f2dde9281a..c1c8daf37a 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,8 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage Parser + * @addtogroup Parser */ /** @@ -29,7 +28,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -316,8 +315,24 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); -/** @package MediaWiki */ +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + +/** + * XHTML sanitizer for MediaWiki + * @addtogroup Parser + */ class Sanitizer { + const NONE = 0; + const INITIAL_NONLETTER = 1; + /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments @@ -327,19 +342,23 @@ class Sanitizer { * @param array $args for the processing callback * @return string */ - static function removeHTMLtags( $text, $processCallback = null, $args = array() ) { - global $wgUseTidy, $wgUserHtml; - $fname = 'Parser::removeHTMLtags'; - wfProfileIn( $fname ); + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array() ) { + global $wgUseTidy; + + static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; - if( $wgUserHtml ) { - $htmlpairs = array( # Tags that must be closed + wfProfileIn( __METHOD__ ); + + if ( !$staticInitialised ) { + + $htmlpairs = array_merge( $extratags, array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' - ); + ) ); $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); @@ -350,7 +369,7 @@ class Sanitizer { 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' ); - $tabletags = array( # Can only appear inside table + $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', ); $htmllist = array( # Tags used by list @@ -360,44 +379,48 @@ class Sanitizer { 'li', ); - } else { - $htmlpairs = array(); - $htmlsingle = array(); - $htmlnest = array(); - $tabletags = array(); - } + $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); + $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); - $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); + # Convert them all to hashtables for faster lookup + $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' ); + foreach ( $vars as $var ) { + $$var = array_flip( $$var ); + } + $staticInitialised = true; + } # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); - $text = array_shift( $bits ); + $text = str_replace( '>', '>', array_shift( $bits ) ); if(!$wgUseTidy) { - $tagstack = array(); $tablestack = array(); + $tagstack = $tablestack = array(); foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); + $regs = array(); + if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { + list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + } else { + $slash = $t = $params = $brace = $rest = null; + } $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack if ( $slash ) { # Closing a tag... - if( in_array( $t, $htmlsingleonly ) ) { + if( isset( $htmlsingleonly[$t] ) ) { $badtag = 1; } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { - if ( in_array($ot, $htmlsingleallowed) ) { + if ( isset( $htmlsingleallowed[$ot] ) ) { # Pop all elements with an optional close tag # and see if we find a match below them $optstack = array(); array_push ($optstack, $ot); while ( ( ( $ot = @array_pop( $tagstack ) ) != $t ) && - in_array($ot, $htmlsingleallowed) ) { + isset( $htmlsingleallowed[$ot] ) ) + { array_push ($optstack, $ot); } if ( $t != $ot ) { @@ -410,7 +433,7 @@ class Sanitizer { } else { @array_push( $tagstack, $ot ); #
  • can be nested in