/**
* Acceptable tag name charset from HTML5 parsing spec
- * http://dev.w3.org/html5/spec-preview/tokenization.html#tag-open-state
+ * http://www.w3.org/TR/html5/syntax.html#tag-open-state
*/
const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
} else {
# this might be possible using tidy itself
foreach ( $bits as $x ) {
- preg_match( self::ELEMENT_BITS_REGEX, $x, $regs );
-
- wfSuppressWarnings();
- list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
- wfRestoreWarnings();
+ if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
+ list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
- $badtag = false;
- if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
- if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
- }
+ $badtag = false;
+ if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
+ if ( is_callable( $processCallback ) ) {
+ call_user_func_array( $processCallback, array( &$params, $args ) );
+ }
- if ( !Sanitizer::validateTag( $params, $t ) ) {
- $badtag = true;
- }
+ if ( !Sanitizer::validateTag( $params, $t ) ) {
+ $badtag = true;
+ }
- $newparams = Sanitizer::fixTagAttributes( $params, $t );
- if ( !$badtag ) {
- $rest = str_replace( '>', '>', $rest );
- $text .= "<$slash$t$newparams$brace$rest";
- continue;
+ $newparams = Sanitizer::fixTagAttributes( $params, $t );
+ if ( !$badtag ) {
+ $rest = str_replace( '>', '>', $rest );
+ $text .= "<$slash$t$newparams$brace$rest";
+ continue;
+ }
}
}
$text .= '<' . str_replace( '>', '>', $x );
$value = preg_replace_callback(
'/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
function ( $matches ) {
- $cp = utf8ToCodepoint( $matches[0] );
+ $cp = UtfNormal\Utils::utf8ToCodepoint( $matches[0] );
if ( $cp === false ) {
return '';
}
// Line continuation
return '';
} elseif ( $matches[2] !== '' ) {
- $char = codepointToUtf8( hexdec( $matches[2] ) );
+ $char = UtfNormal\Utils::codepointToUtf8( hexdec( $matches[2] ) );
} elseif ( $matches[3] !== '' ) {
$char = $matches[3];
} else {
*/
static function decodeChar( $codepoint ) {
if ( Sanitizer::validateCodepoint( $codepoint ) ) {
- return codepointToUtf8( $codepoint );
+ return UtfNormal\Utils::codepointToUtf8( $codepoint );
} else {
- return UTF8_REPLACEMENT;
+ return UtfNormal\Constants::UTF8_REPLACEMENT;
}
}
$name = self::$htmlEntityAliases[$name];
}
if ( isset( self::$htmlEntities[$name] ) ) {
- return codepointToUtf8( self::$htmlEntities[$name] );
+ return UtfNormal\Utils::codepointToUtf8( self::$htmlEntities[$name] );
} else {
return "&$name;";
}