# Allow any attribute beginning with "data-"
# However:
- # * data-ooui is reserved for ooui
- # * data-mw and data-parsoid are reserved for parsoid
- # * data-mw-<name here> is reserved for extensions (or core) if
- # they need to communicate some data to the client and want to be
- # sure that it isn't coming from an untrusted user.
+ # * Disallow data attributes used by MediaWiki code
# * Ensure that the attribute is not namespaced by banning
# colons.
- if ( !preg_match( '/^data-(?!ooui|mw|parsoid)[^:]*$/i', $attribute )
+ if ( !preg_match( '/^data-[^:]*$/i', $attribute )
&& !isset( $whitelist[$attribute] )
+ || self::isReservedDataAttribute( $attribute )
) {
continue;
}
# Strip javascript "expression" from stylesheets.
- # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
+ # https://msdn.microsoft.com/en-us/library/ms537634.aspx
if ( $attribute == 'style' ) {
$value = Sanitizer::checkCss( $value );
}
# NOTE: even though elements using href/src are not allowed directly, supply
# validation code that can be used by tag hook handlers, etc
- if ( $attribute === 'href' || $attribute === 'src' ) {
+ if ( $attribute === 'href' || $attribute === 'src' || $attribute === 'poster' ) {
if ( !preg_match( $hrefExp, $value ) ) {
continue; // drop any href or src attributes not using an allowed protocol.
// NOTE: this also drops all relative URLs
return $out;
}
+ /**
+ * Given an attribute name, checks whether it is a reserved data attribute
+ * (such as data-mw-foo) which is unavailable to user-generated HTML so MediaWiki
+ * core and extension code can safely use it to communicate with frontend code.
+ * @param string $attr Attribute name.
+ * @return bool
+ */
+ public static function isReservedDataAttribute( $attr ) {
+ // data-ooui is reserved for ooui.
+ // data-mw and data-parsoid are reserved for parsoid.
+ // data-mw-<name here> is reserved for extensions (or core) if
+ // they need to communicate some data to the client and want to be
+ // sure that it isn't coming from an untrusted user.
+ // We ignore the possibility of namespaces since user-generated HTML
+ // can't use them anymore.
+ return (bool)preg_match( '/^data-(ooui|mw|parsoid)/i', $attr );
+ }
+
/**
* Merge two sets of HTML attributes. Conflicting items in the second set
* will override those in the first, except for 'class' attributes which
];
$id = urlencode( strtr( $id, ' ', '_' ) );
- $id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
+ $id = strtr( $id, $replace );
if ( !preg_match( '/^[a-zA-Z]/', $id ) && !in_array( 'noninitial', $options ) ) {
// Initial character must be a letter!
# true
'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ),
+ 'video' => array_merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ),
+ 'source' => array_merge( $common, [ 'type', 'src' ] ),
+ 'track' => array_merge( $common, [ 'type', 'src', 'srclang', 'kind', 'label' ] ),
+
# 15.2.1
'tt' => $common,
'b' => $common,