}
/**
- * Cleans up HTML, removes dangerous tags and attributes, and
- * removes HTML comments
- * @param string $text
- * @param callable $processCallback Callback to do any variable or parameter
- * replacements in HTML attribute values
- * @param array|bool $args Arguments for the processing callback
+ * Return the various lists of recognized tags
* @param array $extratags For any extra tags to include
* @param array $removetags For any tags (default or extra) to exclude
- * @return string
+ * @return array
*/
- public static function removeHTMLtags( $text, $processCallback = null,
- $args = array(), $extratags = array(), $removetags = array()
- ) {
- global $wgUseTidy, $wgAllowMicrodataAttributes, $wgAllowImageTag;
+ public static function getRecognizedTagData( $extratags = array(), $removetags = array() ) {
+ global $wgAllowMicrodataAttributes, $wgAllowImageTag;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
}
$staticInitialised = $globalContext;
}
+
# Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays
$extratags = array_flip( $extratags );
$removetags = array_flip( $removetags );
$htmlpairs = array_merge( $extratags, $htmlpairsStatic );
$htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
+ return array(
+ 'htmlpairs' => $htmlpairs,
+ 'htmlsingle' => $htmlsingle,
+ 'htmlsingleonly' => $htmlsingleonly,
+ 'htmlnest' => $htmlnest,
+ 'tabletags' => $tabletags,
+ 'htmllist' => $htmllist,
+ 'listtags' => $listtags,
+ 'htmlsingleallowed' => $htmlsingleallowed,
+ 'htmlelements' => $htmlelements,
+ );
+ }
+
+ /**
+ * Cleans up HTML, removes dangerous tags and attributes, and
+ * removes HTML comments
+ * @param string $text
+ * @param callable $processCallback Callback to do any variable or parameter
+ * replacements in HTML attribute values
+ * @param array|bool $args Arguments for the processing callback
+ * @param array $extratags For any extra tags to include
+ * @param array $removetags For any tags (default or extra) to exclude
+ * @return string
+ */
+ public static function removeHTMLtags( $text, $processCallback = null,
+ $args = array(), $extratags = array(), $removetags = array()
+ ) {
+ global $wgUseTidy;
+
+ extract( self::getRecognizedTagData( $extratags, $removetags ) );
+
# Remove HTML comments
$text = Sanitizer::removeHTMLcomments( $text );
$bits = explode( '<', $text );
$badtag = true;
} elseif ( $slash ) {
# Closing a tag... is it the one we just opened?
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$ot = array_pop( $tagstack );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
if ( $ot != $t ) {
if ( isset( $htmlsingleallowed[$ot] ) ) {
# and see if we find a match below them
$optstack = array();
array_push( $optstack, $ot );
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$ot = array_pop( $tagstack );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) {
array_push( $optstack, $ot );
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$ot = array_pop( $tagstack );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
}
if ( $t != $ot ) {
# No match. Push the optional elements back again
$badtag = true;
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$ot = array_pop( $optstack );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
while ( $ot ) {
array_push( $tagstack, $ot );
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
$ot = array_pop( $optstack );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
}
}
} else {
- wfSuppressWarnings();
+ MediaWiki\suppressWarnings();
array_push( $tagstack, $ot );
- wfRestoreWarnings();
+ MediaWiki\restoreWarnings();
# <li> can be nested in <ul> or <ol>, skip those cases:
if ( !isset( $htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
- preg_match( self::ELEMENT_BITS_REGEX, $x, $regs );
-
- wfSuppressWarnings();
- list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
- wfRestoreWarnings();
+ if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
+ list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
- $badtag = false;
- if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
- if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
- }
+ $badtag = false;
+ if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
+ if ( is_callable( $processCallback ) ) {
+ call_user_func_array( $processCallback, array( &$params, $args ) );
+ }
- if ( !Sanitizer::validateTag( $params, $t ) ) {
- $badtag = true;
- }
+ if ( !Sanitizer::validateTag( $params, $t ) ) {
+ $badtag = true;
+ }
- $newparams = Sanitizer::fixTagAttributes( $params, $t );
- if ( !$badtag ) {
- $rest = str_replace( '>', '>', $rest );
- $text .= "<$slash$t$newparams$brace$rest";
- continue;
+ $newparams = Sanitizer::fixTagAttributes( $params, $t );
+ if ( !$badtag ) {
+ $rest = str_replace( '>', '>', $rest );
+ $text .= "<$slash$t$newparams$brace$rest";
+ continue;
+ }
}
}
$text .= '<' . str_replace( '>', '>', $x );
$value = preg_replace_callback(
'/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
function ( $matches ) {
- $cp = utf8ToCodepoint( $matches[0] );
+ $cp = UtfNormal\Utils::utf8ToCodepoint( $matches[0] );
if ( $cp === false ) {
return '';
}
// Line continuation
return '';
} elseif ( $matches[2] !== '' ) {
- $char = codepointToUtf8( hexdec( $matches[2] ) );
+ $char = UtfNormal\Utils::codepointToUtf8( hexdec( $matches[2] ) );
} elseif ( $matches[3] !== '' ) {
$char = $matches[3];
} else {
*/
static function decodeChar( $codepoint ) {
if ( Sanitizer::validateCodepoint( $codepoint ) ) {
- return codepointToUtf8( $codepoint );
+ return UtfNormal\Utils::codepointToUtf8( $codepoint );
} else {
- return UTF8_REPLACEMENT;
+ return UtfNormal\Constants::UTF8_REPLACEMENT;
}
}
$name = self::$htmlEntityAliases[$name];
}
if ( isset( self::$htmlEntities[$name] ) ) {
- return codepointToUtf8( self::$htmlEntities[$name] );
+ return UtfNormal\Utils::codepointToUtf8( self::$htmlEntities[$name] );
} else {
return "&$name;";
}