X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=ed09701d4bf714181d35663b34da2967f4e18755;hb=77ce3b98a0193b674232e972e7c6993a585412d7;hp=2def06a9dd4d1ce27add861c92695c9c4ae7568e;hpb=0c2d09e312f0f577e5376dd57c5675087bb17244;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 2def06a9dd..ed09701d4b 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -56,6 +56,21 @@ class Sanitizer { const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i'; const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/"; + /** + * Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding. + * + * @since 1.30 + */ + const ID_PRIMARY = 0; + + /** + * Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false + * if no fallback is configured. + * + * @since 1.30 + */ + const ID_FALLBACK = 1; + /** * List of all named character entities defined in HTML 4.01 * https://www.w3.org/TR/html4/sgml/entities.html @@ -800,7 +815,7 @@ class Sanitizer { # Escape HTML id attributes if ( $attribute === 'id' ) { - $value = self::escapeId( $value, 'noninitial' ); + $value = self::escapeIdForAttribute( $value, self::ID_PRIMARY ); } # Escape HTML id reference lists @@ -1164,6 +1179,8 @@ class Sanitizer { * ambiguous if it's part of something that looks like a percent escape * (which don't work reliably in fragments cross-browser). * + * @deprecated since 1.30, use one of this class' escapeIdFor*() functions + * * @see https://www.w3.org/TR/html401/types.html#type-name Valid characters * in the id and name attributes * @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with @@ -1215,21 +1232,146 @@ class Sanitizer { return $id; } + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid HTML id attribute. + * + * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe, + * be sure to use proper escaping. + * + * @param string $id String to escape + * @param int $mode One of ID_* constants, specifying whether the primary or fallback encoding + * should be used. + * @return string|bool Escaped ID or false if fallback encoding is requested but it's not + * configured. + * + * @since 1.30 + */ + public static function escapeIdForAttribute( $id, $mode = self::ID_PRIMARY ) { + global $wgFragmentMode; + + if ( !isset( $wgFragmentMode[$mode] ) ) { + if ( $mode === self::ID_PRIMARY ) { + throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' ); + } + return false; + } + + $internalMode = $wgFragmentMode[$mode]; + + return self::escapeIdInternal( $id, $internalMode ); + } + + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid URL fragment. + * + * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe, + * be sure to use proper escaping. + * + * @param string $id String to escape + * @return string Escaped ID + * + * @since 1.30 + */ + public static function escapeIdForLink( $id ) { + global $wgFragmentMode; + + if ( !isset( $wgFragmentMode[self::ID_PRIMARY] ) ) { + throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' ); + } + + $mode = $wgFragmentMode[self::ID_PRIMARY]; + + $id = self::escapeIdInternal( $id, $mode ); + $id = self::urlEscapeId( $id, $mode ); + + return $id; + } + + /** + * Given a section name or other user-generated or otherwise unsafe string, escapes it to be + * a valid URL fragment for external interwikis. + * + * @param string $id String to escape + * @return string Escaped ID + * + * @since 1.30 + */ + public static function escapeIdForExternalInterwiki( $id ) { + global $wgExternalInterwikiFragmentMode; + + $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode ); + $id = self::urlEscapeId( $id, $wgExternalInterwikiFragmentMode ); + + return $id; + } + + /** + * Helper for escapeIdFor*() functions. URL-escapes the ID if needed. + * + * @param string $id String to escape + * @param string $mode One of modes from $wgFragmentMode + * @return string + */ + private static function urlEscapeId( $id, $mode ) { + if ( $mode === 'html5' ) { + $id = urlencode( $id ); + $id = str_replace( '%3A', ':', $id ); + } + + return $id; + } + + /** + * Helper for escapeIdFor*() functions. Performs most of the actual escaping. + * + * @param string $id String to escape + * @param string $mode One of modes from $wgFragmentMode + * @return string + */ + private static function escapeIdInternal( $id, $mode ) { + $id = self::decodeCharReferences( $id ); + + switch ( $mode ) { + case 'html5': + $id = str_replace( ' ', '_', $id ); + break; + case 'legacy': + // This corresponds to 'noninitial' mode of the old escapeId() + static $replace = [ + '%3A' => ':', + '%' => '.' + ]; + + $id = urlencode( str_replace( ' ', '_', $id ) ); + $id = strtr( $id, $replace ); + break; + case 'html5-legacy': + $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id ); + $id = trim( $id, '_' ); + if ( $id === '' ) { + // Must have been all whitespace to start with. + $id = '_'; + } + break; + default: + throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ ); + } + + return $id; + } + /** * Given a string containing a space delimited list of ids, escape each id * to match ids escaped by the escapeId() function. * + * @todo wfDeprecated() uses of $options in 1.31, remove completely in 1.32 + * * @since 1.27 * * @param string $referenceString Space delimited list of ids - * @param string|array $options String or array of strings (default is array()): - * 'noninitial': This is a non-initial fragment of an id, not a full id, - * so don't pay attention if the first character isn't valid at the - * beginning of an id. Only matters if $wgExperimentalHtmlIds is - * false. - * 'legacy': Behave the way the old HTML 4-based ID escaping worked even - * if $wgExperimentalHtmlIds is used, so we can generate extra - * anchors and links won't break. + * @param string|array $options Deprecated and does nothing. * @return string */ static function escapeIdReferenceList( $referenceString, $options = [] ) { @@ -1238,7 +1380,7 @@ class Sanitizer { # Escape each token as an id foreach ( $references as &$ref ) { - $ref = self::escapeId( $ref, $options ); + $ref = self::escapeIdForAttribute( $ref ); } # Merge the array back to a space delimited list string @@ -1535,7 +1677,10 @@ class Sanitizer { $text = preg_replace_callback( self::CHAR_REFS_REGEX, [ 'Sanitizer', 'decodeCharReferencesCallback' ], - $text, /* limit */ -1, $count ); + $text, + -1, //limit + $count + ); if ( $count ) { return $wgContLang->normalize( $text ); @@ -1772,7 +1917,7 @@ class Sanitizer { # Not usually allowed, but may be used for extension-style hooks # such as when it is rasterized, or if $wgAllowImageTag is # true - 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ), + 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height', 'srcset' ] ), 'video' => array_merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ), 'source' => array_merge( $common, [ 'type', 'src' ] ), @@ -1809,6 +1954,10 @@ class Sanitizer { # https://www.w3.org/TR/REC-MathML/ 'math' => [ 'class', 'style', 'id', 'title' ], + // HTML 5 section 4.5 + 'figure' => $common, + 'figcaption' => $common, + # HTML 5 section 4.6 'bdi' => $common, @@ -1824,7 +1973,7 @@ class Sanitizer { // (ie: validateTag rejects tags missing the attributes needed for Microdata) // So we don't bother including $common attributes that have no purpose. 'meta' => [ 'itemprop', 'content' ], - 'link' => [ 'itemprop', 'href' ], + 'link' => [ 'itemprop', 'href', 'title' ], ]; return $whitelist;