X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=c1c8daf37affbb40f27444e115068ab392dee7c9;hb=08aec83eb97be75b0fc9bdff67f3652c33ef1cbc;hp=25f4d574099a3ed450c7d95cf06500d1912634de;hpb=54005e7a9d98e608e898d0fc5f8e670e15dbac2a;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 25f4d57409..c1c8daf37a 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,8 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage Parser + * @addtogroup Parser */ /** @@ -29,7 +28,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -316,8 +315,24 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); -/** @package MediaWiki */ +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + +/** + * XHTML sanitizer for MediaWiki + * @addtogroup Parser + */ class Sanitizer { + const NONE = 0; + const INITIAL_NONLETTER = 1; + /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments @@ -327,19 +342,23 @@ class Sanitizer { * @param array $args for the processing callback * @return string */ - function removeHTMLtags( $text, $processCallback = null, $args = array() ) { - global $wgUseTidy, $wgUserHtml; - $fname = 'Parser::removeHTMLtags'; - wfProfileIn( $fname ); + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array() ) { + global $wgUseTidy; + + static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised; - if( $wgUserHtml ) { - $htmlpairs = array( # Tags that must be closed + wfProfileIn( __METHOD__ ); + + if ( !$staticInitialised ) { + + $htmlpairs = array_merge( $extratags, array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' - ); + ) ); $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); @@ -350,7 +369,7 @@ class Sanitizer { 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' ); - $tabletags = array( # Can only appear inside table + $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', ); $htmllist = array( # Tags used by list @@ -360,66 +379,92 @@ class Sanitizer { 'li', ); - } else { - $htmlpairs = array(); - $htmlsingle = array(); - $htmlnest = array(); - $tabletags = array(); - } + $htmlsingleallowed = array_merge( $htmlsingle, $tabletags ); + $htmlelements = array_merge( $htmlsingle, $htmlpairs, $htmlnest ); - $htmlsingle = array_merge( $tabletags, $htmlsingle ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs ); + # Convert them all to hashtables for faster lookup + $vars = array( 'htmlpairs', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelements' ); + foreach ( $vars as $var ) { + $$var = array_flip( $$var ); + } + $staticInitialised = true; + } # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); - $text = array_shift( $bits ); + $text = str_replace( '>', '>', array_shift( $bits ) ); if(!$wgUseTidy) { - $tagstack = array(); $tablestack = array(); + $tagstack = $tablestack = array(); foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); + $regs = array(); + if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { + list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + } else { + $slash = $t = $params = $brace = $rest = null; + } $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack if ( $slash ) { # Closing a tag... - if( in_array( $t, $htmlsingleonly ) ) { + if( isset( $htmlsingleonly[$t] ) ) { $badtag = 1; } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { - @array_push( $tagstack, $ot ); - #
  • can be nested in