/**
* Acceptable tag name charset from HTML5 parsing spec
- * http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ * https://www.w3.org/TR/html5/syntax.html#tag-open-state
*/
const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
/**
* List of all named character entities defined in HTML 4.01
- * http://www.w3.org/TR/html4/sgml/entities.html
+ * https://www.w3.org/TR/html4/sgml/entities.html
* As well as ' which is only defined starting in XHTML1.
*/
private static $htmlEntities = [
/**
* Regular expression to match HTML/XML attribute pairs within a tag.
* Allows some... latitude. Based on,
- * http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
+ * https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
* Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
* @return string
*/
| url\s*\(
| image\s*\(
| image-set\s*\(
+ | attr\s*\([^)]+[\s,]+url
!ix', $value ) ) {
return '/* insecure input */';
}
* ambiguous if it's part of something that looks like a percent escape
* (which don't work reliably in fragments cross-browser).
*
- * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters
+ * @see https://www.w3.org/TR/html401/types.html#type-name Valid characters
* in the id and name attributes
- * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
+ * @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
* the id attribute
- * @see http://www.whatwg.org/html/elements.html#the-id-attribute
+ * @see https://www.w3.org/TR/html5/dom.html#the-id-attribute
* HTML5 definition of id attribute
*
* @param string $id Id to escape
*
* @todo For extra validity, input should be validated UTF-8.
*
- * @see http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
+ * @see https://www.w3.org/TR/CSS21/syndata.html Valid characters/format
*
* @param string $class
* @return string
} elseif ( !isset( $set[2] ) ) {
# In XHTML, attributes must have a value so return an empty string.
# See "Empty attribute syntax",
- # http://www.w3.org/TR/html5/syntax.html#syntax-attribute-name
+ # https://www.w3.org/TR/html5/syntax.html#syntax-attribute-name
return "";
} else {
throw new MWException( "Tag conditions not met. This should never happen and is a bug." );
# RDFa
# These attributes are specified in section 9 of
- # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
+ # https://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
'about',
'property',
'resource',
'typeof',
# Microdata. These are specified by
- # http://www.whatwg.org/html/microdata.html#the-microdata-model
+ # https://html.spec.whatwg.org/multipage/microdata.html#the-microdata-model
'itemid',
'itemprop',
'itemref',
];
# Numbers refer to sections in HTML 4.01 standard describing the element.
- # See: http://www.w3.org/TR/html4/
+ # See: https://www.w3.org/TR/html4/
$whitelist = [
# 7.5.4
'div' => $block,
# 9.3.2
'br' => array_merge( $common, [ 'clear' ] ),
- # http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
+ # https://www.w3.org/TR/html5/text-level-semantics.html#the-wbr-element
'wbr' => $common,
# 9.3.4
'hr' => array_merge( $common, [ 'width' ] ),
# HTML Ruby annotation text module, simple ruby only.
- # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
+ # https://www.w3.org/TR/html5/text-level-semantics.html#the-ruby-element
'ruby' => $common,
# rbc
'rb' => $common,
# MathML root element, where used for extensions
# 'title' may not be 100% valid here; it's XHTML
- # http://www.w3.org/TR/REC-MathML/
+ # https://www.w3.org/TR/REC-MathML/
'math' => [ 'class', 'style', 'id', 'title' ],
# HTML 5 section 4.6
'bdi' => $common,
# HTML5 elements, defined by:
- # http://www.whatwg.org/html/
+ # https://html.spec.whatwg.org/multipage/semantics.html#the-data-element
'data' => array_merge( $common, [ 'value' ] ),
'time' => array_merge( $common, [ 'datetime' ] ),
'mark' => $common,
list( /* $whole */, $protocol, $host, $rest ) = $matches;
// Characters that will be ignored in IDNs.
- // http://tools.ietf.org/html/3454#section-3.1
+ // https://tools.ietf.org/html/rfc3454#section-3.1
// Strip them before further processing so blacklists and such work.
$strip = "/
\\s| # general whitespace