X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=0cb5b0b8bcb631cb35ad69dbfc587d7089213a3e;hb=424e25e9a9e7dafbaeded0cbf6e73b422958583a;hp=25f4d574099a3ed450c7d95cf06500d1912634de;hpb=54005e7a9d98e608e898d0fc5f8e670e15dbac2a;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 25f4d57409..0cb5b0b8bc 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -20,8 +20,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * - * @package MediaWiki - * @subpackage Parser + * @file + * @ingroup Parser */ /** @@ -29,7 +29,7 @@ * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences */ define( 'MW_CHAR_REFS_REGEX', - '/&([A-Za-z0-9]+); + '/&([A-Za-z0-9\x80-\xff]+); |&\#([0-9]+); |&\#x([0-9A-Za-z]+); |&\#X([0-9A-Za-z]+); @@ -316,29 +316,48 @@ $wgHtmlEntities = array( 'zwj' => 8205, 'zwnj' => 8204 ); -/** @package MediaWiki */ +/** + * Character entity aliases accepted by MediaWiki + */ +global $wgHtmlEntityAliases; +$wgHtmlEntityAliases = array( + 'רלמ' => 'rlm', + 'رلم' => 'rlm', +); + + +/** + * XHTML sanitizer for MediaWiki + * @ingroup Parser + */ class Sanitizer { /** * Cleans up HTML, removes dangerous tags and attributes, and * removes HTML comments * @private - * @param string $text - * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values - * @param array $args for the processing callback + * @param $text String + * @param $processCallback Callback to do any variable or parameter replacements in HTML attribute values + * @param $args Array for the processing callback + * @param $extratags Array for any extra tags to include + * @param $removetags Array for any tags (default or extra) to exclude * @return string */ - function removeHTMLtags( $text, $processCallback = null, $args = array() ) { - global $wgUseTidy, $wgUserHtml; - $fname = 'Parser::removeHTMLtags'; - wfProfileIn( $fname ); + static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) { + global $wgUseTidy; + + static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, + $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; + + wfProfileIn( __METHOD__ ); + + if ( !$staticInitialised ) { - if( $wgUserHtml ) { - $htmlpairs = array( # Tags that must be closed + $htmlpairsStatic = array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', - 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u' + 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr' ); $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' @@ -350,7 +369,7 @@ class Sanitizer { 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' ); - $tabletags = array( # Can only appear inside table + $tabletags = array( # Can only appear inside table, we will close them 'td', 'th', 'tr', ); $htmllist = array( # Tags used by list @@ -360,66 +379,97 @@ class Sanitizer { 'li', ); - } else { - $htmlpairs = array(); - $htmlsingle = array(); - $htmlnest = array(); - $tabletags = array(); - } + $htmlsingleallowed = array_unique( array_merge( $htmlsingle, $tabletags ) ); + $htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) ); - $htmlsingle = array_merge( $tabletags, $htmlsingle ); - $htmlelements = array_merge( $htmlsingle, $htmlpairs ); + # Convert them all to hashtables for faster lookup + $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags', + 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' ); + foreach ( $vars as $var ) { + $$var = array_flip( $$var ); + } + $staticInitialised = true; + } + # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays + $extratags = array_flip( $extratags ); + $removetags = array_flip( $removetags ); + $htmlpairs = array_merge( $extratags, $htmlpairsStatic ); + $htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ) , $removetags ); # Remove HTML comments $text = Sanitizer::removeHTMLcomments( $text ); $bits = explode( '<', $text ); - $text = array_shift( $bits ); + $text = str_replace( '>', '>', array_shift( $bits ) ); if(!$wgUseTidy) { - $tagstack = array(); $tablestack = array(); + $tagstack = $tablestack = array(); foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); + $regs = array(); + if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) { + list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; + } else { + $slash = $t = $params = $brace = $rest = null; + } $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { # Check our stack if ( $slash ) { # Closing a tag... - if( in_array( $t, $htmlsingleonly ) ) { + if( isset( $htmlsingleonly[$t] ) ) { $badtag = 1; } elseif ( ( $ot = @array_pop( $tagstack ) ) != $t ) { - @array_push( $tagstack, $ot ); - #
  • can be nested in