Merge "Don't fallback from uk to ru"
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 4069658..44e4e3e 100644 (file)
@@ -41,7 +41,7 @@ class Sanitizer {
 
        /**
         * Acceptable tag name charset from HTML5 parsing spec
-        * http://www.w3.org/TR/html5/syntax.html#tag-open-state
+        * https://www.w3.org/TR/html5/syntax.html#tag-open-state
         */
        const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
 
@@ -58,7 +58,7 @@ class Sanitizer {
 
        /**
         * List of all named character entities defined in HTML 4.01
-        * http://www.w3.org/TR/html4/sgml/entities.html
+        * https://www.w3.org/TR/html4/sgml/entities.html
         * As well as &apos; which is only defined starting in XHTML1.
         */
        private static $htmlEntities = [
@@ -333,7 +333,7 @@ class Sanitizer {
        /**
         * Regular expression to match HTML/XML attribute pairs within a tag.
         * Allows some... latitude. Based on,
-        * http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
+        * https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
         * Used in Sanitizer::fixTagAttributes and Sanitizer::decodeTagAttributes
         * @return string
         */
@@ -1149,11 +1149,11 @@ class Sanitizer {
         * ambiguous if it's part of something that looks like a percent escape
         * (which don't work reliably in fragments cross-browser).
         *
-        * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters
+        * @see https://www.w3.org/TR/html401/types.html#type-name Valid characters
         *   in the id and name attributes
-        * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
+        * @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
         *   the id attribute
-        * @see http://www.whatwg.org/html/elements.html#the-id-attribute
+        * @see https://www.w3.org/TR/html5/dom.html#the-id-attribute
         *   HTML5 definition of id attribute
         *
         * @param string $id Id to escape
@@ -1239,7 +1239,7 @@ class Sanitizer {
         *
         * @todo For extra validity, input should be validated UTF-8.
         *
-        * @see http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
+        * @see https://www.w3.org/TR/CSS21/syndata.html Valid characters/format
         *
         * @param string $class
         * @return string
@@ -1352,7 +1352,7 @@ class Sanitizer {
                } elseif ( !isset( $set[2] ) ) {
                        # In XHTML, attributes must have a value so return an empty string.
                        # See "Empty attribute syntax",
-                       # http://www.w3.org/TR/html5/syntax.html#syntax-attribute-name
+                       # https://www.w3.org/TR/html5/syntax.html#syntax-attribute-name
                        return "";
                } else {
                        throw new MWException( "Tag conditions not met. This should never happen and is a bug." );
@@ -1622,7 +1622,7 @@ class Sanitizer {
 
                        # RDFa
                        # These attributes are specified in section 9 of
-                       # http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
+                       # https://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
                        'about',
                        'property',
                        'resource',
@@ -1630,7 +1630,7 @@ class Sanitizer {
                        'typeof',
 
                        # Microdata. These are specified by
-                       # http://www.whatwg.org/html/microdata.html#the-microdata-model
+                       # https://html.spec.whatwg.org/multipage/microdata.html#the-microdata-model
                        'itemid',
                        'itemprop',
                        'itemref',
@@ -1654,7 +1654,7 @@ class Sanitizer {
                ];
 
                # Numbers refer to sections in HTML 4.01 standard describing the element.
-               # See: http://www.w3.org/TR/html4/
+               # See: https://www.w3.org/TR/html4/
                $whitelist = [
                        # 7.5.4
                        'div'        => $block,
@@ -1701,7 +1701,7 @@ class Sanitizer {
                        # 9.3.2
                        'br'         => array_merge( $common, [ 'clear' ] ),
 
-                       # http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
+                       # https://www.w3.org/TR/html5/text-level-semantics.html#the-wbr-element
                        'wbr'        => $common,
 
                        # 9.3.4
@@ -1776,7 +1776,7 @@ class Sanitizer {
                        'hr'         => array_merge( $common, [ 'width' ] ),
 
                        # HTML Ruby annotation text module, simple ruby only.
-                       # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element
+                       # https://www.w3.org/TR/html5/text-level-semantics.html#the-ruby-element
                        'ruby'       => $common,
                        # rbc
                        'rb'         => $common,
@@ -1786,14 +1786,14 @@ class Sanitizer {
 
                        # MathML root element, where used for extensions
                        # 'title' may not be 100% valid here; it's XHTML
-                       # http://www.w3.org/TR/REC-MathML/
+                       # https://www.w3.org/TR/REC-MathML/
                        'math'       => [ 'class', 'style', 'id', 'title' ],
 
                        # HTML 5 section 4.6
                        'bdi' => $common,
 
                        # HTML5 elements, defined by:
-                       # http://www.whatwg.org/html/
+                       # https://html.spec.whatwg.org/multipage/semantics.html#the-data-element
                        'data' => array_merge( $common, [ 'value' ] ),
                        'time' => array_merge( $common, [ 'datetime' ] ),
                        'mark' => $common,