/**
* Regular expression to match HTML/XML attribute pairs within a tag.
- * Allows some... latitude. Based on,
- * https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
- * Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
+ * Based on https://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ * Used in Sanitizer::decodeTagAttributes
* @return string
*/
static function getAttribsRegex() {
if ( self::$attribsRegex === null ) {
- $attribFirst = "[:_\p{L}\p{N}]";
- $attrib = "[:_\.\-\p{L}\p{N}]";
- $space = '[\x09\x0a\x0c\x0d\x20]';
+ $spaceChars = '\x09\x0a\x0c\x0d\x20';
+ $space = "[{$spaceChars}]";
+ $attrib = "[^{$spaceChars}\/>=]";
+ $attribFirst = "(?:{$attrib}|=)";
self::$attribsRegex =
- "/(?:^|$space)({$attribFirst}{$attrib}*)
+ "/({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
| '([^']*)(?:'|\$)
| (((?!$space|>).)*)
)
- )?(?=$space|\$)/sxu";
+ )?/sxu";
}
return self::$attribsRegex;
}
+ /**
+ * Lazy-initialised attribute name regex, see getAttribNameRegex()
+ */
+ private static $attribNameRegex;
+
+ /**
+ * Used in Sanitizer::decodeTagAttributes to filter attributes.
+ * @return string
+ */
+ static function getAttribNameRegex() {
+ if ( self::$attribNameRegex === null ) {
+ $attribFirst = "[:_\p{L}\p{N}]";
+ $attrib = "[:_\.\-\p{L}\p{N}]";
+ self::$attribNameRegex = "/^({$attribFirst}{$attrib}*)$/sxu";
+ }
+ return self::$attribNameRegex;
+ }
+
/**
* Return the various lists of recognized tags
* @param array $extratags For any extra tags to include
$badtag = true;
}
}
- } else {
- if ( $t == 'table' ) {
- $tagstack = array_pop( $tablestack );
- }
+ } elseif ( $t == 'table' ) {
+ $tagstack = array_pop( $tablestack );
}
$newparams = '';
} else {
/**
* Given a string containing a space delimited list of ids, escape each id
- * to match ids escaped by the escapeId() function.
- *
- * @todo remove $options completely in 1.32
+ * to match ids escaped by the escapeIdForAttribute() function.
*
* @since 1.27
*
* @param string $referenceString Space delimited list of ids
- * @param string|array $options Deprecated and does nothing.
* @return string
*/
- static function escapeIdReferenceList( $referenceString, $options = [] ) {
- if ( $options ) {
- wfDeprecated( __METHOD__ . ' with $options', '1.31' );
- }
+ public static function escapeIdReferenceList( $referenceString ) {
# Explode the space delimited list string into an array of tokens
$references = preg_split( '/\s+/', "{$referenceString}", -1, PREG_SPLIT_NO_EMPTY );
return [];
}
- $attribs = [];
$pairs = [];
if ( !preg_match_all(
self::getAttribsRegex(),
$text,
$pairs,
PREG_SET_ORDER ) ) {
- return $attribs;
+ return [];
}
+ $attribs = [];
foreach ( $pairs as $set ) {
$attribute = strtolower( $set[1] );
+
+ // Filter attribute names with unacceptable characters
+ if ( !preg_match( self::getAttribNameRegex(), $attribute ) ) {
+ continue;
+ }
+
$value = self::getTagAttributeCallback( $set );
// Normalize whitespace