<?php
-
/**
* (X)HTML sanitizer for MediaWiki
*
'zwj' => 8205,
'zwnj' => 8204 );
+/** @package MediaWiki */
class Sanitizer {
/**
* Cleans up HTML, removes dangerous tags and attributes, and
* removes HTML comments
* @access private
* @param string $text
+ * @param callback $processCallback to do any variable or parameter replacements in HTML attribute values
+ * @param array $args for the processing callback
* @return string
*/
- function removeHTMLtags( $text ) {
+ function removeHTMLtags( $text, $processCallback = null, $args = array() ) {
global $wgUseTidy, $wgUserHtml;
$fname = 'Parser::removeHTMLtags';
wfProfileIn( $fname );
$htmlsingle = array(
'br', 'hr', 'li', 'dt', 'dd'
);
+ $htmlsingleonly = array( # Elements that cannot have close tags
+ 'br', 'hr'
+ );
$htmlnest = array( # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
$tagstack = array(); $tablestack = array();
foreach ( $bits as $x ) {
$prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
- preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
+ preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
error_reporting( $prev );
# Check our stack
if ( $slash ) {
# Closing a tag...
- if ( ! in_array( $t, $htmlsingle ) &&
+ if( in_array( $t, $htmlsingleonly ) ) {
+ $badtag = 1;
+ } elseif( !in_array( $t, $htmlsingle ) &&
( $ot = @array_pop( $tagstack ) ) != $t ) {
@array_push( $tagstack, $ot );
$badtag = 1;
} else if ( in_array( $t, $tagstack ) &&
! in_array ( $t , $htmlnest ) ) {
$badtag = 1 ;
+ } elseif( in_array( $t, $htmlsingleonly ) ) {
+ # Hack to force empty tag for uncloseable elements
+ $brace = '/>';
} else if ( ! in_array( $t, $htmlsingle ) ) {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
}
array_push( $tagstack, $t );
}
+
+ # Replace any variables or template parameters with
+ # plaintext results.
+ if( is_callable( $processCallback ) ) {
+ call_user_func_array( $processCallback, array( &$params, $args ) );
+ }
+
# Strip non-approved attributes from the tag
$newparams = Sanitizer::fixTagAttributes( $params, $t );
}
if ( ! $badtag ) {
$rest = str_replace( '>', '>', $rest );
- $text .= "<$slash$t$newparams$brace$rest";
+ $close = ( $brace == '/>' ) ? ' /' : '';
+ $text .= "<$slash$t$newparams$close>$rest";
continue;
}
}
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
- preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
+ preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
@list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ if( is_callable( $processCallback ) ) {
+ call_user_func_array( $processCallback, array( &$params, $args ) );
+ }
$newparams = Sanitizer::fixTagAttributes( $params, $t );
$rest = str_replace( '>', '>', $rest );
$text .= "<$slash$t$newparams$brace$rest";
'/(' . URL_PROTOCOLS . '):/',
'\\1:', $value );
- if( !isset( $attribs[$attribute] ) ) {
- $attribs[$attribute] = "$attribute=\"$value\"";
- }
+ // If this attribute was previously set, override it.
+ // Output should only have one attribute of each name.
+ $attribs[$attribute] = "$attribute=\"$value\"";
}
if( empty( $attribs ) ) {
return '';