X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=4c996771e89f5bf761ebdebf6e8cf7d54a5c10c2;hb=235e92846eb6fd132f28ec5e05240e3821542ac8;hp=2def06a9dd4d1ce27add861c92695c9c4ae7568e;hpb=a840e8308989ce61886778c70f26d2e088abb363;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 2def06a9dd..4c996771e8 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -56,6 +56,21 @@ class Sanitizer { const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i'; const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/"; + /** + * Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding. + * + * @since 1.30 + */ + const ID_PRIMARY = 0; + + /** + * Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false + * if no fallback is configured. + * + * @since 1.30 + */ + const ID_FALLBACK = 1; + /** * List of all named character entities defined in HTML 4.01 * https://www.w3.org/TR/html4/sgml/entities.html @@ -800,7 +815,7 @@ class Sanitizer { # Escape HTML id attributes if ( $attribute === 'id' ) { - $value = self::escapeId( $value, 'noninitial' ); + $value = self::escapeIdForAttribute( $value, self::ID_PRIMARY ); } # Escape HTML id reference lists @@ -809,7 +824,7 @@ class Sanitizer { || $attribute === 'aria-labelledby' || $attribute === 'aria-owns' ) { - $value = self::escapeIdReferenceList( $value, 'noninitial' ); + $value = self::escapeIdReferenceList( $value ); } // RDFa and microdata properties allow URLs, URIs and/or CURIs. @@ -1164,6 +1179,8 @@ class Sanitizer { * ambiguous if it's part of something that looks like a percent escape * (which don't work reliably in fragments cross-browser). * + * @deprecated since 1.30, use one of this class' escapeIdFor*() functions + * * @see https://www.w3.org/TR/html401/types.html#type-name Valid characters * in the id and name attributes * @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with @@ -1186,8 +1203,6 @@ class Sanitizer { global $wgExperimentalHtmlIds; $options = (array)$options; - $id = self::decodeCharReferences( $id ); - if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) { $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); $id = trim( $id, '_' ); @@ -1215,30 +1230,138 @@ class Sanitizer { return $id; } + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid HTML id attribute. + * + * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe, + * be sure to use proper escaping. + * + * @param string $id String to escape + * @param int $mode One of ID_* constants, specifying whether the primary or fallback encoding + * should be used. + * @return string|bool Escaped ID or false if fallback encoding is requested but it's not + * configured. + * + * @since 1.30 + */ + public static function escapeIdForAttribute( $id, $mode = self::ID_PRIMARY ) { + global $wgFragmentMode; + + if ( !isset( $wgFragmentMode[$mode] ) ) { + if ( $mode === self::ID_PRIMARY ) { + throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' ); + } + return false; + } + + $internalMode = $wgFragmentMode[$mode]; + + return self::escapeIdInternal( $id, $internalMode ); + } + + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid URL fragment. + * + * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe, + * be sure to use proper escaping. + * + * @param string $id String to escape + * @return string Escaped ID + * + * @since 1.30 + */ + public static function escapeIdForLink( $id ) { + global $wgFragmentMode; + + if ( !isset( $wgFragmentMode[self::ID_PRIMARY] ) ) { + throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' ); + } + + $mode = $wgFragmentMode[self::ID_PRIMARY]; + + $id = self::escapeIdInternal( $id, $mode ); + + return $id; + } + + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid URL fragment for external interwikis. + * + * @param string $id String to escape + * @return string Escaped ID + * + * @since 1.30 + */ + public static function escapeIdForExternalInterwiki( $id ) { + global $wgExternalInterwikiFragmentMode; + + $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode ); + + return $id; + } + + /** + * Helper for escapeIdFor*() functions. Performs most of the actual escaping. + * + * @param string $id String to escape + * @param string $mode One of modes from $wgFragmentMode + * @return string + */ + private static function escapeIdInternal( $id, $mode ) { + switch ( $mode ) { + case 'html5': + $id = str_replace( ' ', '_', $id ); + break; + case 'legacy': + // This corresponds to 'noninitial' mode of the old escapeId() + static $replace = [ + '%3A' => ':', + '%' => '.' + ]; + + $id = urlencode( str_replace( ' ', '_', $id ) ); + $id = strtr( $id, $replace ); + break; + case 'html5-legacy': + $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); + $id = trim( $id, '_' ); + if ( $id === '' ) { + // Must have been all whitespace to start with. + $id = '_'; + } + break; + default: + throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ ); + } + + return $id; + } + /** * Given a string containing a space delimited list of ids, escape each id * to match ids escaped by the escapeId() function. * + * @todo remove $options completely in 1.32 + * * @since 1.27 * * @param string $referenceString Space delimited list of ids - * @param string|array $options String or array of strings (default is array()): - * 'noninitial': This is a non-initial fragment of an id, not a full id, - * so don't pay attention if the first character isn't valid at the - * beginning of an id. Only matters if $wgExperimentalHtmlIds is - * false. - * 'legacy': Behave the way the old HTML 4-based ID escaping worked even - * if $wgExperimentalHtmlIds is used, so we can generate extra - * anchors and links won't break. + * @param string|array $options Deprecated and does nothing. * @return string */ static function escapeIdReferenceList( $referenceString, $options = [] ) { + if ( $options ) { + wfDeprecated( __METHOD__ . ' with $options', '1.31' ); + } # Explode the space delimited list string into an array of tokens $references = preg_split( '/\s+/', "{$referenceString}", -1, PREG_SPLIT_NO_EMPTY ); # Escape each token as an id foreach ( $references as &$ref ) { - $ref = self::escapeId( $ref, $options ); + $ref = self::escapeIdForAttribute( $ref ); } # Merge the array back to a space delimited list string @@ -1535,7 +1658,10 @@ class Sanitizer { $text = preg_replace_callback( self::CHAR_REFS_REGEX, [ 'Sanitizer', 'decodeCharReferencesCallback' ], - $text, /* limit */ -1, $count ); + $text, + -1, //limit + $count + ); if ( $count ) { return $wgContLang->normalize( $text ); @@ -1772,7 +1898,7 @@ class Sanitizer { # Not usually allowed, but may be used for extension-style hooks # such as when it is rasterized, or if $wgAllowImageTag is # true - 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ), + 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height', 'srcset' ] ), 'video' => array_merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ), 'source' => array_merge( $common, [ 'type', 'src' ] ), @@ -1809,6 +1935,10 @@ class Sanitizer { # https://www.w3.org/TR/REC-MathML/ 'math' => [ 'class', 'style', 'id', 'title' ], + // HTML 5 section 4.5 + 'figure' => $common, + 'figcaption' => $common, + # HTML 5 section 4.6 'bdi' => $common, @@ -1824,7 +1954,7 @@ class Sanitizer { // (ie: validateTag rejects tags missing the attributes needed for Microdata) // So we don't bother including $common attributes that have no purpose. 'meta' => [ 'itemprop', 'content' ], - 'link' => [ 'itemprop', 'href' ], + 'link' => [ 'itemprop', 'href', 'title' ], ]; return $whitelist;