dépôts
/
lhc
/
web
/
wiklou.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
update relnotes
[lhc/web/wiklou.git]
/
includes
/
Sanitizer.php
diff --git
a/includes/Sanitizer.php
b/includes/Sanitizer.php
index
4ed2692
..
51c751c
100644
(file)
--- a/
includes/Sanitizer.php
+++ b/
includes/Sanitizer.php
@@
-20,8
+20,7
@@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
- * @package MediaWiki
- * @subpackage Parser
+ * @addtogroup Parser
*/
/**
*/
/**
@@
-316,7
+315,6
@@
$wgHtmlEntities = array(
'zwj' => 8205,
'zwnj' => 8204 );
'zwj' => 8205,
'zwnj' => 8204 );
-/** @package MediaWiki */
class Sanitizer {
/**
* Cleans up HTML, removes dangerous tags and attributes, and
class Sanitizer {
/**
* Cleans up HTML, removes dangerous tags and attributes, and
@@
-330,11
+328,11
@@
class Sanitizer {
static function removeHTMLtags( $text, $processCallback = null, $args = array() ) {
global $wgUseTidy, $wgUserHtml;
static function removeHTMLtags( $text, $processCallback = null, $args = array() ) {
global $wgUseTidy, $wgUserHtml;
- static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
+ static $htmlpairs, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
$htmllist, $listtags, $htmlsingleallowed, $htmlelements, $staticInitialised;
-
+
wfProfileIn( __METHOD__ );
wfProfileIn( __METHOD__ );
-
+
if ( !$staticInitialised ) {
if( $wgUserHtml ) {
$htmlpairs = array( # Tags that must be closed
if ( !$staticInitialised ) {
if( $wgUserHtml ) {
$htmlpairs = array( # Tags that must be closed
@@
-354,7
+352,7
@@
class Sanitizer {
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
);
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
);
- $tabletags = array( # Can only appear inside table
+ $tabletags = array( # Can only appear inside table
, we will close them
'td', 'th', 'tr',
);
$htmllist = array( # Tags used by list
'td', 'th', 'tr',
);
$htmllist = array( # Tags used by list
@@
-386,14
+384,16
@@
class Sanitizer {
# Remove HTML comments
$text = Sanitizer::removeHTMLcomments( $text );
$bits = explode( '<', $text );
# Remove HTML comments
$text = Sanitizer::removeHTMLcomments( $text );
$bits = explode( '<', $text );
- $text =
array_shift( $bits
);
+ $text =
str_replace( '>', '>', array_shift( $bits )
);
if(!$wgUseTidy) {
$tagstack = $tablestack = array();
foreach ( $bits as $x ) {
if(!$wgUseTidy) {
$tagstack = $tablestack = array();
foreach ( $bits as $x ) {
- $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
- preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs );
- list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
- error_reporting( $prev );
+ $regs = array();
+ if( preg_match( '!^(/?)(\\w+)([^>]*?)(/{0,1}>)([^<]*)$!', $x, $regs ) ) {
+ list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
+ } else {
+ $slash = $t = $params = $brace = $rest = null;
+ }
$badtag = 0 ;
if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
$badtag = 0 ;
if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
@@
-451,6
+451,10
@@
class Sanitizer {
} else if( isset( $htmlsingle[$t] ) ) {
# Hack to not close $htmlsingle tags
$brace = NULL;
} else if( isset( $htmlsingle[$t] ) ) {
# Hack to not close $htmlsingle tags
$brace = NULL;
+ } else if( isset( $tabletags[$t] )
+ && in_array($t ,$tagstack) ) {
+ // New table tag but forgot to close the previous one
+ $text .= "</$t>";
} else {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
} else {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
@@
-470,7
+474,7
@@
class Sanitizer {
}
if ( ! $badtag ) {
$rest = str_replace( '>', '>', $rest );
}
if ( ! $badtag ) {
$rest = str_replace( '>', '>', $rest );
- $close = ( $brace == '/>' ) ? ' /' : '';
+ $close = ( $brace == '/>'
&& !$slash
) ? ' /' : '';
$text .= "<$slash$t$newparams$close>$rest";
continue;
}
$text .= "<$slash$t$newparams$close>$rest";
continue;
}
@@
-487,7
+491,7
@@
class Sanitizer {
foreach ( $bits as $x ) {
preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
foreach ( $bits as $x ) {
preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
$x, $regs );
- @list(
$qbar
, $slash, $t, $params, $brace, $rest ) = $regs;
+ @list(
/* $qbar */
, $slash, $t, $params, $brace, $rest ) = $regs;
if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
if( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, array( &$params, $args ) );
if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
if( is_callable( $processCallback ) ) {
call_user_func_array( $processCallback, array( &$params, $args ) );
@@
-603,7
+607,7
@@
class Sanitizer {
$stripped = Sanitizer::decodeCharReferences( $value );
// Remove any comments; IE gets token splitting wrong
$stripped = Sanitizer::decodeCharReferences( $value );
// Remove any comments; IE gets token splitting wrong
- $stripped = StringUtils::delimiterReplace( '/
\*', '\
*/', ' ', $stripped );
+ $stripped = StringUtils::delimiterReplace( '/
*', '
*/', ' ', $stripped );
$value = $stripped;
$value = $stripped;
@@
-643,15
+647,15
@@
class Sanitizer {
if( trim( $text ) == '' ) {
return '';
}
if( trim( $text ) == '' ) {
return '';
}
-
+
$stripped = Sanitizer::validateTagAttributes(
Sanitizer::decodeTagAttributes( $text ), $element );
$stripped = Sanitizer::validateTagAttributes(
Sanitizer::decodeTagAttributes( $text ), $element );
-
+
$attribs = array();
foreach( $stripped as $attribute => $value ) {
$encAttribute = htmlspecialchars( $attribute );
$encValue = Sanitizer::safeEncodeAttribute( $value );
$attribs = array();
foreach( $stripped as $attribute => $value ) {
$encAttribute = htmlspecialchars( $attribute );
$encValue = Sanitizer::safeEncodeAttribute( $value );
-
+
$attribs[] = "$encAttribute=\"$encValue\"";
}
return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
$attribs[] = "$encAttribute=\"$encValue\"";
}
return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
@@
-664,7
+668,7
@@
class Sanitizer {
*/
static function encodeAttribute( $text ) {
$encValue = htmlspecialchars( $text );
*/
static function encodeAttribute( $text ) {
$encValue = htmlspecialchars( $text );
-
+
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
@@
-673,10
+677,10
@@
class Sanitizer {
"\r" => ' ',
"\t" => '	',
) );
"\r" => ' ',
"\t" => '	',
) );
-
+
return $encValue;
}
return $encValue;
}
-
+
/**
* Encode an attribute value for HTML tags, with extra armoring
* against further wiki processing.
/**
* Encode an attribute value for HTML tags, with extra armoring
* against further wiki processing.
@@
-685,7
+689,7
@@
class Sanitizer {
*/
static function safeEncodeAttribute( $text ) {
$encValue = Sanitizer::encodeAttribute( $text );
*/
static function safeEncodeAttribute( $text ) {
$encValue = Sanitizer::encodeAttribute( $text );
-
+
# Templates and links may be expanded in later parsing,
# creating invalid or dangerous output. Suppress this.
$encValue = strtr( $encValue, array(
# Templates and links may be expanded in later parsing,
# creating invalid or dangerous output. Suppress this.
$encValue = strtr( $encValue, array(
@@
-741,7
+745,7
@@
class Sanitizer {
* Given a value, escape it so that it can be used as a CSS class and
* return it.
*
* Given a value, escape it so that it can be used as a CSS class and
* return it.
*
- *
TODO:
For extra validity, input should be validated UTF-8.
+ *
@todo
For extra validity, input should be validated UTF-8.
*
* @link http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
*
*
* @link http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
*
@@
-793,11
+797,11
@@
class Sanitizer {
foreach( $pairs as $set ) {
$attribute = strtolower( $set[1] );
$value = Sanitizer::getTagAttributeCallback( $set );
foreach( $pairs as $set ) {
$attribute = strtolower( $set[1] );
$value = Sanitizer::getTagAttributeCallback( $set );
-
+
// Normalize whitespace
$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
$value = trim( $value );
// Normalize whitespace
$value = preg_replace( '/[\t\r\n ]+/', ' ', $value );
$value = trim( $value );
-
+
// Decode character references
$attribs[$attribute] = Sanitizer::decodeCharReferences( $value );
}
// Decode character references
$attribs[$attribute] = Sanitizer::decodeCharReferences( $value );
}
@@
-1213,7
+1217,7
@@
class Sanitizer {
$out .= "]>\n";
return $out;
}
$out .= "]>\n";
return $out;
}
-
+
static function cleanUrl( $url, $hostname=true ) {
# Normalize any HTML entities in input. They will be
# re-escaped by makeExternalLink().
static function cleanUrl( $url, $hostname=true ) {
# Normalize any HTML entities in input. They will be
# re-escaped by makeExternalLink().
@@
-1221,11
+1225,12
@@
class Sanitizer {
# Escape any control characters introduced by the above step
$url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );
# Escape any control characters introduced by the above step
$url = preg_replace( '/[\][<>"\\x00-\\x20\\x7F]/e', "urlencode('\\0')", $url );
-
+
# Validate hostname portion
# Validate hostname portion
+ $matches = array();
if( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
if( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
- list(
$whole
, $protocol, $host, $rest ) = $matches;
-
+ list(
/* $whole */
, $protocol, $host, $rest ) = $matches;
+
// Characters that will be ignored in IDNs.
// http://tools.ietf.org/html/3454#section-3.1
// Strip them before further processing so blacklists and such work.
// Characters that will be ignored in IDNs.
// http://tools.ietf.org/html/3454#section-3.1
// Strip them before further processing so blacklists and such work.
@@
-1244,11
+1249,11
@@
class Sanitizer {
\xe2\x80\x8d| # 200d ZERO WIDTH JOINER
[\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe00f VARIATION SELECTOR-1-16
/xuD";
\xe2\x80\x8d| # 200d ZERO WIDTH JOINER
[\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe00f VARIATION SELECTOR-1-16
/xuD";
-
+
$host = preg_replace( $strip, '', $host );
$host = preg_replace( $strip, '', $host );
-
+
// @fixme: validate hostnames here
// @fixme: validate hostnames here
-
+
return $protocol . $host . $rest;
} else {
return $url;
return $protocol . $host . $rest;
} else {
return $url;