X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=28b1c27576d08c0dae84c4323b242209d2aa8833;hb=75b58962c6b1fa158c31019e8b7d13453d11dddd;hp=784541940cbb9dca43406016bf65a4e161bb4dae;hpb=145f1819b8e7bfa50ffcee39de450c1a230b04cc;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 784541940c..28b1c27576 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,8 +20,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage Parser + * @file + * @ingroup Parser */ /** @@ -29,7 +29,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -316,8 +316,24 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); -/** @package MediaWiki */ +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + +/** + * XHTML sanitizer for MediaWiki + * @ingroup Parser + */ class Sanitizer { + const NONE = 0; + const INITIAL_NONLETTER = 1; + /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments @@ -327,19 +343,23 @@ class Sanitizer { * @param array $args for the processing callback * @return string */ - function removeHTMLtags( $text, $processCallback = null, $args = array() ) { - global $wgUseTidy, $wgUserHtml; - $fname = 'Parser::removeHTMLtags'; - wfProfileIn( $fname ); + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array() ) { + global $wgUseTidy; + + static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; + + wfProfileIn( __METHOD__ ); - if( $wgUserHtml ) { - $htmlpairs = array( # Tags that must be closed + if ( !$staticInitialised ) { + + $htmlpairs = array_merge( $extratags, array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' - ); + ) ); $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); @@ -350,7 +370,7 @@ class Sanitizer { 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' ); - $tabletags = array( # Can only appear inside table + $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', ); $htmllist = array( # Tags used by list @@ -360,66 +380,92 @@ class Sanitizer { 'li', ); - } else { - $htmlpairs = array(); - $htmlsingle = array(); - $htmlnest = array(); - $tabletags = array(); - } + $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); + $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); - $htmlsingle = array_merge( $tabletags, $htmlsingle ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs ); + # Convert them all to hashtables for faster lookup + $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' ); + foreach ( $vars as $var ) { + $$var = array_flip( $$var ); + } + $staticInitialised = true; + } # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); - $text = array_shift( $bits ); + $text = str_replace( '>', '>', array_shift( $bits ) ); if(!$wgUseTidy) { - $tagstack = array(); $tablestack = array(); + $tagstack = $tablestack = array(); foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); + $regs = array(); + if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { + list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + } else { + $slash = $t = $params = $brace = $rest = null; + } $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack if ( $slash ) { # Closing a tag... - if( in_array( $t, $htmlsingleonly ) ) { + if( isset( $htmlsingleonly[$t] ) ) { $badtag = 1; } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { - @array_push( $tagstack, $ot ); - #
  • can be nested in