* http://www.w3.org/TR/html4/sgml/entities.html
* As well as ' which is only defined starting in XHTML1.
*/
- private static $htmlEntities = array(
+ private static $htmlEntities = [
'Aacute' => 193,
'aacute' => 225,
'Acirc' => 194,
'zeta' => 950,
'zwj' => 8205,
'zwnj' => 8204
- );
+ ];
/**
* Character entity aliases accepted by MediaWiki
*/
- private static $htmlEntityAliases = array(
+ private static $htmlEntityAliases = [
'רלמ' => 'rlm',
'رلم' => 'rlm',
- );
+ ];
/**
* Lazy-initialised attributes regex, see getAttribsRegex()
/**
* Regular expression to match HTML/XML attribute pairs within a tag.
- * Allows some... latitude.
+ * Allows some... latitude. Based on,
+ * http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
* @return string
*/
if ( self::$attribsRegex === null ) {
$attribFirst = '[:A-Z_a-z0-9]';
$attrib = '[:A-Z_a-z-.0-9]';
- $space = '[\x09\x0a\x0d\x20]';
+ $space = '[\x09\x0a\x0c\x0d\x20]';
self::$attribsRegex =
"/(?:^|$space)({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
- \"([^<\"]*)(?:\"|\$)
- | '([^<']*)(?:'|\$)
- | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
+ \"([^\"]*)(?:\"|\$)
+ | '([^']*)(?:'|\$)
+ | (((?!$space|>).)*)
)
)?(?=$space|\$)/sx";
}
* @param array $removetags For any tags (default or extra) to exclude
* @return array
*/
- public static function getRecognizedTagData( $extratags = array(), $removetags = array() ) {
- global $wgAllowMicrodataAttributes, $wgAllowImageTag;
+ public static function getRecognizedTagData( $extratags = [], $removetags = [] ) {
+ global $wgAllowImageTag;
static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
// Base our staticInitialised variable off of the global config state so that if the globals
// are changed (like in the screwed up test system) we will re-initialise the settings.
- $globalContext = implode( '-', compact( 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) );
+ $globalContext = $wgAllowImageTag;
if ( !$staticInitialised || $staticInitialised != $globalContext ) {
- $htmlpairsStatic = array( # Tags that must be closed
+ $htmlpairsStatic = [ # Tags that must be closed
'b', 'bdi', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
'ruby', 'rb', 'rp', 'rt', 'rtc', 'p', 'span', 'abbr', 'dfn',
'kbd', 'samp', 'data', 'time', 'mark'
- );
- $htmlsingle = array(
+ ];
+ $htmlsingle = [
'br', 'wbr', 'hr', 'li', 'dt', 'dd'
- );
- $htmlsingleonly = array( # Elements that cannot have close tags
+ ];
+ $htmlsingleonly = [ # Elements that cannot have close tags
'br', 'wbr', 'hr'
- );
- if ( $wgAllowMicrodataAttributes ) {
- $htmlsingle[] = $htmlsingleonly[] = 'meta';
- $htmlsingle[] = $htmlsingleonly[] = 'link';
- }
- $htmlnest = array( # Tags that can be nested--??
+ ];
+
+ $htmlsingle[] = $htmlsingleonly[] = 'meta';
+ $htmlsingle[] = $htmlsingleonly[] = 'link';
+
+ $htmlnest = [ # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'li', 'dl', 'dt', 'dd', 'font', 'big', 'small', 'sub', 'sup', 'span',
'var', 'kbd', 'samp', 'em', 'strong', 'q', 'ruby', 'bdo'
- );
- $tabletags = array( # Can only appear inside table, we will close them
+ ];
+ $tabletags = [ # Can only appear inside table, we will close them
'td', 'th', 'tr',
- );
- $htmllist = array( # Tags used by list
+ ];
+ $htmllist = [ # Tags used by list
'ul', 'ol',
- );
- $listtags = array( # Tags that can appear in a list
+ ];
+ $listtags = [ # Tags that can appear in a list
'li',
- );
+ ];
if ( $wgAllowImageTag ) {
$htmlsingle[] = 'img';
$htmlelementsStatic = array_unique( array_merge( $htmlsingle, $htmlpairsStatic, $htmlnest ) );
# Convert them all to hashtables for faster lookup
- $vars = array( 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
- 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' );
+ $vars = [ 'htmlpairsStatic', 'htmlsingle', 'htmlsingleonly', 'htmlnest', 'tabletags',
+ 'htmllist', 'listtags', 'htmlsingleallowed', 'htmlelementsStatic' ];
foreach ( $vars as $var ) {
$$var = array_flip( $$var );
}
$htmlpairs = array_merge( $extratags, $htmlpairsStatic );
$htmlelements = array_diff_key( array_merge( $extratags, $htmlelementsStatic ), $removetags );
- return array(
+ return [
'htmlpairs' => $htmlpairs,
'htmlsingle' => $htmlsingle,
'htmlsingleonly' => $htmlsingleonly,
'listtags' => $listtags,
'htmlsingleallowed' => $htmlsingleallowed,
'htmlelements' => $htmlelements,
- );
+ ];
}
/**
* @return string
*/
public static function removeHTMLtags( $text, $processCallback = null,
- $args = array(), $extratags = array(), $removetags = array()
+ $args = [], $extratags = [], $removetags = []
) {
extract( self::getRecognizedTagData( $extratags, $removetags ) );
$bits = explode( '<', $text );
$text = str_replace( '>', '>', array_shift( $bits ) );
if ( !MWTidy::isEnabled() ) {
- $tagstack = $tablestack = array();
+ $tagstack = $tablestack = [];
foreach ( $bits as $x ) {
- $regs = array();
+ $regs = [];
# $slash: Does the current element start with a '/'?
# $t: Current element name
# $params: String between element name and >
if ( isset( $htmlsingleallowed[$ot] ) ) {
# Pop all elements with an optional close tag
# and see if we find a match below them
- $optstack = array();
+ $optstack = [];
array_push( $optstack, $ot );
MediaWiki\suppressWarnings();
$ot = array_pop( $tagstack );
} else {
if ( $t == 'table' ) {
array_push( $tablestack, $tagstack );
- $tagstack = array();
+ $tagstack = [];
}
array_push( $tagstack, $t );
}
# Replace any variables or template parameters with
# plaintext results.
if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
+ call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( !Sanitizer::validateTag( $params, $t ) ) {
$t = strtolower( $t );
if ( isset( $htmlelements[$t] ) ) {
if ( is_callable( $processCallback ) ) {
- call_user_func_array( $processCallback, array( &$params, $args ) );
+ call_user_func_array( $processCallback, [ &$params, $args ] );
}
if ( !Sanitizer::validateTag( $params, $t ) ) {
* @todo Check for unique id attribute :P
*/
static function validateAttributes( $attribs, $whitelist ) {
- global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
-
$whitelist = array_flip( $whitelist );
$hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/';
- $out = array();
+ $out = [];
foreach ( $attribs as $attribute => $value ) {
- # allow XML namespace declaration if RDFa is enabled
- if ( $wgAllowRdfaAttributes && preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
+ # Allow XML namespace declaration to allow RDFa
+ if ( preg_match( self::XMLNS_ATTRIBUTE_PATTERN, $attribute ) ) {
if ( !preg_match( self::EVIL_URI_PATTERN, $value ) ) {
$out[$attribute] = $value;
}
$out[$attribute] = $value;
}
- if ( $wgAllowMicrodataAttributes ) {
- # itemtype, itemid, itemref don't make sense without itemscope
- if ( !array_key_exists( 'itemscope', $out ) ) {
- unset( $out['itemtype'] );
- unset( $out['itemid'] );
- unset( $out['itemref'] );
- }
- # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
+ # itemtype, itemid, itemref don't make sense without itemscope
+ if ( !array_key_exists( 'itemscope', $out ) ) {
+ unset( $out['itemtype'] );
+ unset( $out['itemid'] );
+ unset( $out['itemref'] );
}
+ # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
+
return $out;
}
)/xu";
}
$value = preg_replace_callback( $decodeRegex,
- array( __CLASS__, 'cssDecodeCallback' ), $value );
+ [ __CLASS__, 'cssDecodeCallback' ], $value );
// Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
$value = preg_replace_callback(
// Convert more characters IE6 might treat as ascii
// U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
$value = str_replace(
- array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
- array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
+ [ 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ],
+ [ 'r', 'n', 'n', 'l', 'i', '(', '(' ],
$value
);
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
- $encValue = strtr( $encValue, array(
+ $encValue = strtr( $encValue, [
"\n" => ' ',
"\r" => ' ',
"\t" => '	',
- ) );
+ ] );
return $encValue;
}
# Templates and links may be expanded in later parsing,
# creating invalid or dangerous output. Suppress this.
- $encValue = strtr( $encValue, array(
+ $encValue = strtr( $encValue, [
'<' => '<', // This should never happen,
'>' => '>', // we've received invalid input
'"' => '"', // which should have been escaped.
'PMID' => 'PMID',
'|' => '|',
'__' => '__',
- ) );
+ ] );
# Stupid hack
$encValue = preg_replace_callback(
'/((?i)' . wfUrlProtocols() . ')/',
- array( 'Sanitizer', 'armorLinksCallback' ),
+ [ 'Sanitizer', 'armorLinksCallback' ],
$encValue );
return $encValue;
}
* anchors and links won't break.
* @return string
*/
- static function escapeId( $id, $options = array() ) {
+ static function escapeId( $id, $options = [] ) {
global $wgExperimentalHtmlIds;
$options = (array)$options;
}
// HTML4-style escaping
- static $replace = array(
+ static $replace = [
'%3A' => ':',
'%' => '.'
- );
+ ];
$id = urlencode( strtr( $id, ' ', '_' ) );
$id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
* anchors and links won't break.
* @return string
*/
- static function escapeIdReferenceList( $referenceString, $options = array() ) {
+ static function escapeIdReferenceList( $referenceString, $options = [] ) {
# Explode the space delimited list string into an array of tokens
$references = preg_split( '/\s+/', "{$referenceString}", -1, PREG_SPLIT_NO_EMPTY );
static function escapeClass( $class ) {
// Convert ugly stuff to underscores and kill underscores in ugly places
return rtrim( preg_replace(
- array( '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ),
+ [ '/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/', '/_+/' ],
'_',
$class ), '_' );
}
*/
public static function decodeTagAttributes( $text ) {
if ( trim( $text ) == '' ) {
- return array();
+ return [];
}
- $attribs = array();
- $pairs = array();
+ $attribs = [];
+ $pairs = [];
if ( !preg_match_all(
self::getAttribsRegex(),
$text,
* @return string
*/
public static function safeEncodeTagAttributes( $assoc_array ) {
- $attribs = array();
+ $attribs = [];
foreach ( $assoc_array as $attribute => $value ) {
$encAttribute = htmlspecialchars( $attribute );
$encValue = Sanitizer::safeEncodeAttribute( $value );
static function normalizeCharReferences( $text ) {
return preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
+ [ 'Sanitizer', 'normalizeCharReferencesCallback' ],
$text );
}
static function normalizeEntity( $name ) {
if ( isset( self::$htmlEntityAliases[$name] ) ) {
return '&' . self::$htmlEntityAliases[$name] . ';';
- } elseif ( in_array( $name, array( 'lt', 'gt', 'amp', 'quot' ) ) ) {
+ } elseif ( in_array( $name, [ 'lt', 'gt', 'amp', 'quot' ] ) ) {
return "&$name;";
} elseif ( isset( self::$htmlEntities[$name] ) ) {
return '&#' . self::$htmlEntities[$name] . ';';
public static function decodeCharReferences( $text ) {
return preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'decodeCharReferencesCallback' ),
+ [ 'Sanitizer', 'decodeCharReferencesCallback' ],
$text );
}
global $wgContLang;
$text = preg_replace_callback(
self::CHAR_REFS_REGEX,
- array( 'Sanitizer', 'decodeCharReferencesCallback' ),
+ [ 'Sanitizer', 'decodeCharReferencesCallback' ],
$text, /* limit */ -1, $count );
if ( $count ) {
$list = Sanitizer::setupAttributeWhitelist();
return isset( $list[$element] )
? $list[$element]
- : array();
+ : [];
}
/**
* @return array
*/
static function setupAttributeWhitelist() {
- global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
- static $whitelist, $staticInitialised;
+ static $whitelist;
- $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) );
-
- if ( $whitelist !== null && $staticInitialised == $globalContext ) {
+ if ( $whitelist !== null ) {
return $whitelist;
}
- $common = array(
+ $common = [
# HTML
'id',
'class',
'aria-labelledby',
'aria-owns',
'role',
- );
- if ( $wgAllowRdfaAttributes ) {
- # RDFa attributes as specified in section 9 of
+ # RDFa
+ # These attributes are specified in section 9 of
# http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
- $common = array_merge( $common, array(
- 'about', 'property', 'resource', 'datatype', 'typeof',
- ) );
- }
+ 'about',
+ 'property',
+ 'resource',
+ 'datatype',
+ 'typeof',
- if ( $wgAllowMicrodataAttributes ) {
- # add HTML5 microdata tags as specified by
+ # Microdata. These are specified by
# http://www.whatwg.org/html/microdata.html#the-microdata-model
- $common = array_merge( $common, array(
- 'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
- ) );
- }
-
- $block = array_merge( $common, array( 'align' ) );
- $tablealign = array( 'align', 'valign' );
- $tablecell = array(
+ 'itemid',
+ 'itemprop',
+ 'itemref',
+ 'itemscope',
+ 'itemtype',
+ ];
+
+ $block = array_merge( $common, [ 'align' ] );
+ $tablealign = [ 'align', 'valign' ];
+ $tablecell = [
'abbr',
'axis',
'headers',
'width', # deprecated
'height', # deprecated
'bgcolor', # deprecated
- );
+ ];
# Numbers refer to sections in HTML 4.01 standard describing the element.
# See: http://www.w3.org/TR/html4/
- $whitelist = array(
+ $whitelist = [
# 7.5.4
'div' => $block,
'center' => $common, # deprecated
# acronym
# 9.2.2
- 'blockquote' => array_merge( $common, array( 'cite' ) ),
- 'q' => array_merge( $common, array( 'cite' ) ),
+ 'blockquote' => array_merge( $common, [ 'cite' ] ),
+ 'q' => array_merge( $common, [ 'cite' ] ),
# 9.2.3
'sub' => $common,
'p' => $block,
# 9.3.2
- 'br' => array_merge( $common, array( 'clear' ) ),
+ 'br' => array_merge( $common, [ 'clear' ] ),
# http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
'wbr' => $common,
# 9.3.4
- 'pre' => array_merge( $common, array( 'width' ) ),
+ 'pre' => array_merge( $common, [ 'width' ] ),
# 9.4
- 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ),
- 'del' => array_merge( $common, array( 'cite', 'datetime' ) ),
+ 'ins' => array_merge( $common, [ 'cite', 'datetime' ] ),
+ 'del' => array_merge( $common, [ 'cite', 'datetime' ] ),
# 10.2
- 'ul' => array_merge( $common, array( 'type' ) ),
- 'ol' => array_merge( $common, array( 'type', 'start', 'reversed' ) ),
- 'li' => array_merge( $common, array( 'type', 'value' ) ),
+ 'ul' => array_merge( $common, [ 'type' ] ),
+ 'ol' => array_merge( $common, [ 'type', 'start', 'reversed' ] ),
+ 'li' => array_merge( $common, [ 'type', 'value' ] ),
# 10.3
'dl' => $common,
# 11.2.1
'table' => array_merge( $common,
- array( 'summary', 'width', 'border', 'frame',
+ [ 'summary', 'width', 'border', 'frame',
'rules', 'cellspacing', 'cellpadding',
'align', 'bgcolor',
- ) ),
+ ] ),
# 11.2.2
'caption' => $block,
'tbody' => $common,
# 11.2.4
- 'colgroup' => array_merge( $common, array( 'span' ) ),
- 'col' => array_merge( $common, array( 'span' ) ),
+ 'colgroup' => array_merge( $common, [ 'span' ] ),
+ 'col' => array_merge( $common, [ 'span' ] ),
# 11.2.5
- 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ),
+ 'tr' => array_merge( $common, [ 'bgcolor' ], $tablealign ),
# 11.2.6
'td' => array_merge( $common, $tablecell, $tablealign ),
# 12.2
# NOTE: <a> is not allowed directly, but the attrib
# whitelist is used from the Parser object
- 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa
+ 'a' => array_merge( $common, [ 'href', 'rel', 'rev' ] ), # rel/rev esp. for RDFa
# 13.2
# Not usually allowed, but may be used for extension-style hooks
# such as <math> when it is rasterized, or if $wgAllowImageTag is
# true
- 'img' => array_merge( $common, array( 'alt', 'src', 'width', 'height' ) ),
+ 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ),
# 15.2.1
'tt' => $common,
'u' => $common,
# 15.2.2
- 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ),
+ 'font' => array_merge( $common, [ 'size', 'color', 'face' ] ),
# basefont
# 15.3
- 'hr' => array_merge( $common, array( 'width' ) ),
+ 'hr' => array_merge( $common, [ 'width' ] ),
# HTML Ruby annotation text module, simple ruby only.
# http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
# MathML root element, where used for extensions
# 'title' may not be 100% valid here; it's XHTML
# http://www.w3.org/TR/REC-MathML/
- 'math' => array( 'class', 'style', 'id', 'title' ),
+ 'math' => [ 'class', 'style', 'id', 'title' ],
# HTML 5 section 4.6
'bdi' => $common,
# HTML5 elements, defined by:
# http://www.whatwg.org/html/
- 'data' => array_merge( $common, array( 'value' ) ),
- 'time' => array_merge( $common, array( 'datetime' ) ),
+ 'data' => array_merge( $common, [ 'value' ] ),
+ 'time' => array_merge( $common, [ 'datetime' ] ),
'mark' => $common,
// meta and link are only permitted by removeHTMLtags when Microdata
// Also meta and link are only valid in WikiText as Microdata elements
// (ie: validateTag rejects tags missing the attributes needed for Microdata)
// So we don't bother including $common attributes that have no purpose.
- 'meta' => array( 'itemprop', 'content' ),
- 'link' => array( 'itemprop', 'href' ),
- );
-
- $staticInitialised = $globalContext;
+ 'meta' => [ 'itemprop', 'content' ],
+ 'link' => [ 'itemprop', 'href' ],
+ ];
return $whitelist;
}
# Escape any control characters introduced by the above step
$url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/',
- array( __CLASS__, 'cleanUrlCallback' ), $url );
+ [ __CLASS__, 'cleanUrlCallback' ], $url );
# Validate hostname portion
- $matches = array();
+ $matches = [];
if ( preg_match( '!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches ) ) {
list( /* $whole */, $protocol, $host, $rest ) = $matches;
*/
public static function validateEmail( $addr ) {
$result = null;
- if ( !Hooks::run( 'isValidEmailAddr', array( $addr, &$result ) ) ) {
+ if ( !Hooks::run( 'isValidEmailAddr', [ $addr, &$result ] ) ) {
return $result;
}