const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i';
const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/";
+ /**
+ * Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
+ *
+ * @since 1.30
+ */
+ const ID_PRIMARY = 0;
+
+ /**
+ * Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false
+ * if no fallback is configured.
+ *
+ * @since 1.30
+ */
+ const ID_FALLBACK = 1;
+
/**
* List of all named character entities defined in HTML 4.01
* https://www.w3.org/TR/html4/sgml/entities.html
# Escape HTML id attributes
if ( $attribute === 'id' ) {
- $value = self::escapeId( $value, 'noninitial' );
+ $value = self::escapeIdForAttribute( $value, self::ID_PRIMARY );
}
# Escape HTML id reference lists
* ambiguous if it's part of something that looks like a percent escape
* (which don't work reliably in fragments cross-browser).
*
+ * @deprecated since 1.30, use one of this class' escapeIdFor*() functions
+ *
* @see https://www.w3.org/TR/html401/types.html#type-name Valid characters
* in the id and name attributes
* @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
return $id;
}
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid HTML id attribute.
+ *
+ * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+ * be sure to use proper escaping.
+ *
+ * @param string $id String to escape
+ * @param int $mode One of ID_* constants, specifying whether the primary or fallback encoding
+ * should be used.
+ * @return string|bool Escaped ID or false if fallback encoding is requested but it's not
+ * configured.
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForAttribute( $id, $mode = self::ID_PRIMARY ) {
+ global $wgFragmentMode;
+
+ if ( !isset( $wgFragmentMode[$mode] ) ) {
+ if ( $mode === self::ID_PRIMARY ) {
+ throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+ }
+ return false;
+ }
+
+ $internalMode = $wgFragmentMode[$mode];
+
+ return self::escapeIdInternal( $id, $internalMode );
+ }
+
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid URL fragment.
+ *
+ * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+ * be sure to use proper escaping.
+ *
+ * @param string $id String to escape
+ * @return string Escaped ID
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForLink( $id ) {
+ global $wgFragmentMode;
+
+ if ( !isset( $wgFragmentMode[self::ID_PRIMARY] ) ) {
+ throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+ }
+
+ $mode = $wgFragmentMode[self::ID_PRIMARY];
+
+ $id = self::escapeIdInternal( $id, $mode );
+ $id = self::urlEscapeId( $id, $mode );
+
+ return $id;
+ }
+
+ /**
+ * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+ * a valid URL fragment for external interwikis.
+ *
+ * @param string $id String to escape
+ * @return string Escaped ID
+ *
+ * @since 1.30
+ */
+ public static function escapeIdForExternalInterwiki( $id ) {
+ global $wgExternalInterwikiFragmentMode;
+
+ $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode );
+ $id = self::urlEscapeId( $id, $wgExternalInterwikiFragmentMode );
+
+ return $id;
+ }
+
+ /**
+ * Helper for escapeIdFor*() functions. URL-escapes the ID if needed.
+ *
+ * @param string $id String to escape
+ * @param string $mode One of modes from $wgFragmentMode
+ * @return string
+ */
+ private static function urlEscapeId( $id, $mode ) {
+ if ( $mode === 'html5' ) {
+ $id = urlencode( $id );
+ $id = str_replace( '%3A', ':', $id );
+ }
+
+ return $id;
+ }
+
+ /**
+ * Helper for escapeIdFor*() functions. Performs most of the actual escaping.
+ *
+ * @param string $id String to escape
+ * @param string $mode One of modes from $wgFragmentMode
+ * @return string
+ */
+ private static function escapeIdInternal( $id, $mode ) {
+ $id = self::decodeCharReferences( $id );
+
+ switch ( $mode ) {
+ case 'html5':
+ $id = str_replace( ' ', '_', $id );
+ break;
+ case 'legacy':
+ // This corresponds to 'noninitial' mode of the old escapeId()
+ static $replace = [
+ '%3A' => ':',
+ '%' => '.'
+ ];
+
+ $id = urlencode( str_replace( ' ', '_', $id ) );
+ $id = strtr( $id, $replace );
+ break;
+ case 'html5-legacy':
+ $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
+ $id = trim( $id, '_' );
+ if ( $id === '' ) {
+ // Must have been all whitespace to start with.
+ $id = '_';
+ }
+ break;
+ default:
+ throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ );
+ }
+
+ return $id;
+ }
+
/**
* Given a string containing a space delimited list of ids, escape each id
* to match ids escaped by the escapeId() function.
*
+ * @todo wfDeprecated() uses of $options in 1.31, remove completely in 1.32
+ *
* @since 1.27
*
* @param string $referenceString Space delimited list of ids
- * @param string|array $options String or array of strings (default is array()):
- * 'noninitial': This is a non-initial fragment of an id, not a full id,
- * so don't pay attention if the first character isn't valid at the
- * beginning of an id. Only matters if $wgExperimentalHtmlIds is
- * false.
- * 'legacy': Behave the way the old HTML 4-based ID escaping worked even
- * if $wgExperimentalHtmlIds is used, so we can generate extra
- * anchors and links won't break.
+ * @param string|array $options Deprecated and does nothing.
* @return string
*/
static function escapeIdReferenceList( $referenceString, $options = [] ) {
# Escape each token as an id
foreach ( $references as &$ref ) {
- $ref = self::escapeId( $ref, $options );
+ $ref = self::escapeIdForAttribute( $ref );
}
# Merge the array back to a space delimited list string
$text = preg_replace_callback(
self::CHAR_REFS_REGEX,
[ 'Sanitizer', 'decodeCharReferencesCallback' ],
- $text, /* limit */ -1, $count );
+ $text,
+ -1, //limit
+ $count
+ );
if ( $count ) {
return $wgContLang->normalize( $text );
# Not usually allowed, but may be used for extension-style hooks
# such as <math> when it is rasterized, or if $wgAllowImageTag is
# true
- 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height' ] ),
+ 'img' => array_merge( $common, [ 'alt', 'src', 'width', 'height', 'srcset' ] ),
'video' => array_merge( $common, [ 'poster', 'controls', 'preload', 'width', 'height' ] ),
'source' => array_merge( $common, [ 'type', 'src' ] ),
# https://www.w3.org/TR/REC-MathML/
'math' => [ 'class', 'style', 'id', 'title' ],
+ // HTML 5 section 4.5
+ 'figure' => $common,
+ 'figcaption' => $common,
+
# HTML 5 section 4.6
'bdi' => $common,
// (ie: validateTag rejects tags missing the attributes needed for Microdata)
// So we don't bother including $common attributes that have no purpose.
'meta' => [ 'itemprop', 'content' ],
- 'link' => [ 'itemprop', 'href' ],
+ 'link' => [ 'itemprop', 'href', 'title' ],
];
return $whitelist;