# WAI-ARIA
# http://www.w3.org/TR/wai-aria/
- # http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#wai-aria
+ # http://www.whatwg.org/html/elements.html#wai-aria
# For now we only support role="presentation" until we work out what roles should be
# usable by content and we ensure that our code explicitly rejects patterns that
# violate HTML5's ARIA restrictions.
/**
* Pick apart some CSS and check it for forbidden or unsafe structures.
* Returns a sanitized string. This sanitized string will have
- * character references and escape sequences decoded, and comments
- * stripped. If the input is just too evil, only a comment complaining
- * about evilness will be returned.
+ * character references and escape sequences decoded and comments
+ * stripped (unless it is itself one valid comment, in which case the value
+ * will be passed through). If the input is just too evil, only a comment
+ * complaining about evilness will be returned.
*
* Currently URL references, 'expression', 'tps' are forbidden.
*
$value = preg_replace_callback( $decodeRegex,
array( __CLASS__, 'cssDecodeCallback' ), $value );
- // Remove any comments; IE gets token splitting wrong
- // This must be done AFTER decoding character references and
- // escape sequences, because those steps can introduce comments
- // This step cannot introduce character references or escape
- // sequences, because it replaces comments with spaces rather
- // than removing them completely.
- $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
-
- // Remove anything after a comment-start token, to guard against
- // incorrect client implementations.
- $commentPos = strpos( $value, '/*' );
- if ( $commentPos !== false ) {
- $value = substr( $value, 0, $commentPos );
+ // Let the value through if it's nothing but a single comment, to
+ // allow other functions which may reject it to pass some error
+ // message through.
+ if ( !preg_match( '! ^ \s* /\* [^*\\/]* \*/ \s* $ !x', $value ) ) {
+ // Remove any comments; IE gets token splitting wrong
+ // This must be done AFTER decoding character references and
+ // escape sequences, because those steps can introduce comments
+ // This step cannot introduce character references or escape
+ // sequences, because it replaces comments with spaces rather
+ // than removing them completely.
+ $value = StringUtils::delimiterReplace( '/*', '*/', ' ', $value );
+
+ // Remove anything after a comment-start token, to guard against
+ // incorrect client implementations.
+ $commentPos = strpos( $value, '/*' );
+ if ( $commentPos !== false ) {
+ $value = substr( $value, 0, $commentPos );
+ }
}
// Reject problematic keywords and control characters
$decoded = Sanitizer::decodeTagAttributes( $text );
$stripped = Sanitizer::validateTagAttributes( $decoded, $element );
- $attribs = array();
- foreach ( $stripped as $attribute => $value ) {
- $encAttribute = htmlspecialchars( $attribute );
- $encValue = Sanitizer::safeEncodeAttribute( $value );
-
- $attribs[] = "$encAttribute=\"$encValue\"";
- }
- return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ return Sanitizer::safeEncodeTagAttributes( $stripped );
}
/**
* in the id and
* name attributes
* @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
- * @see http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#the-id-attribute
+ * @see http://www.whatwg.org/html/elements.html#the-id-attribute
* HTML5 definition of id attribute
*
* @param string $id id to escape
return $attribs;
}
+ /**
+ * Build a partial tag string from an associative array of attribute
+ * names and values as returned by decodeTagAttributes.
+ *
+ * @param $assoc_array Array
+ * @return String
+ */
+ public static function safeEncodeTagAttributes( $assoc_array ) {
+ $attribs = array();
+ foreach ( $assoc_array as $attribute => $value ) {
+ $encAttribute = htmlspecialchars( $attribute );
+ $encValue = Sanitizer::safeEncodeAttribute( $value );
+
+ $attribs[] = "$encAttribute=\"$encValue\"";
+ }
+ return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ }
+
/**
* Pick the appropriate attribute value from a match set from the
* attribs regex matches.
}
if ( $wgAllowMicrodataAttributes ) {
- # add HTML5 microdata tags as specified by http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata.html#the-microdata-model
+ # add HTML5 microdata tags as specified by http://www.whatwg.org/html/microdata.html#the-microdata-model
$common = array_merge( $common, array(
'itemid', 'itemprop', 'itemref', 'itemscope', 'itemtype'
) );
'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
# HTML Ruby annotation text module, simple ruby only.
- # http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#the-ruby-element
+ # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
'ruby' => $common,
# rbc
# rtc
'bdi' => $common,
# HTML5 elements, defined by:
- # http://www.whatwg.org/specs/web-apps/current-work/multipage/
+ # http://www.whatwg.org/html/
'data' => array_merge( $common, array( 'value' ) ),
'time' => array_merge( $common, array( 'datetime' ) ),
'mark' => $common,
* Does a string look like an e-mail address?
*
* This validates an email address using an HTML5 specification found at:
- * http://www.whatwg.org/specs/web-apps/current-work/multipage/states-of-the-type-attribute.html#valid-e-mail-address
+ * http://www.whatwg.org/html/states-of-the-type-attribute.html#valid-e-mail-address
* Which as of 2011-01-24 says:
*
* A valid e-mail address is a string that matches the ABNF production