* Allows some... latitude.
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
*/
-$attrib_first = '[:A-Z_a-z]';
+$attribFirst = '[:A-Z_a-z]';
$attrib = '[:A-Z_a-z-.0-9]';
$space = '[\x09\x0a\x0d\x20]';
define( 'MW_ATTRIBS_REGEX',
- "/(?:^|$space)({$attrib_first}{$attrib}*)
+ "/(?:^|$space)({$attribFirst}{$attrib}*)
($space*=$space*
(?:
# The attribute value: quoted or alone
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
'strike', 'strong', 'tt', 'var', 'div', 'center',
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
- 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'u', 'abbr', 'dfn'
+ 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
+ 'kbd', 'samp'
);
$htmlsingle = array(
'br', 'hr', 'li', 'dt', 'dd'
*
* To ensure we don't have to bother escaping anything, we also strip ', ",
* & even if $wgExperimentalIds is true. TODO: Is this the best tactic?
- * We also strip # because it upsets IE6.
+ * We also strip # because it upsets IE, and % because it could be
+ * ambiguous if it's part of something that looks like a percent escape
+ * (which don't work reliably in fragments cross-browser).
*
* @see http://www.w3.org/TR/html401/types.html#type-name Valid characters
* in the id and
if ( $wgHtml5 && $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
$id = Sanitizer::decodeCharReferences( $id );
- $id = preg_replace( '/[ \t\n\r\f_\'"&#]+/', '_', $id );
+ $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
$id = trim( $id, '_' );
if ( $id === '' ) {
# Must have been all whitespace to start with.
'cite' => $common,
'dfn' => $common,
'code' => $common,
- # samp
- # kbd
+ 'samp' => $common,
+ 'kbd' => $common,
'var' => $common,
'abbr' => $common,
# acronym