+
+ $stripped = Sanitizer::validateTagAttributes(
+ Sanitizer::decodeTagAttributes( $text ), $element );
+
+ $attribs = array();
+ foreach( $stripped as $attribute => $value ) {
+ $encAttribute = htmlspecialchars( $attribute );
+ $encValue = Sanitizer::safeEncodeAttribute( $value );
+
+ $attribs[] = "$encAttribute=\"$encValue\"";
+ }
+ return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';
+ }
+
+ /**
+ * Encode an attribute value for HTML output.
+ * @param $text
+ * @return HTML-encoded text fragment
+ */
+ static function encodeAttribute( $text ) {
+ $encValue = htmlspecialchars( $text );
+
+ // Whitespace is normalized during attribute decoding,
+ // so if we've been passed non-spaces we must encode them
+ // ahead of time or they won't be preserved.
+ $encValue = strtr( $encValue, array(
+ "\n" => ' ',
+ "\r" => ' ',
+ "\t" => '	',
+ ) );
+
+ return $encValue;
+ }
+
+ /**
+ * Encode an attribute value for HTML tags, with extra armoring
+ * against further wiki processing.
+ * @param $text
+ * @return HTML-encoded text fragment
+ */
+ static function safeEncodeAttribute( $text ) {
+ $encValue = Sanitizer::encodeAttribute( $text );
+
+ # Templates and links may be expanded in later parsing,
+ # creating invalid or dangerous output. Suppress this.
+ $encValue = strtr( $encValue, array(
+ '<' => '<', // This should never happen,
+ '>' => '>', // we've received invalid input
+ '"' => '"', // which should have been escaped.
+ '{' => '{',
+ '[' => '[',
+ "''" => '''',
+ 'ISBN' => 'ISBN',
+ 'RFC' => 'RFC',
+ 'PMID' => 'PMID',
+ '|' => '|',
+ '__' => '__',
+ ) );
+
+ # Stupid hack
+ $encValue = preg_replace_callback(
+ '/(' . wfUrlProtocols() . ')/',
+ array( 'Sanitizer', 'armorLinksCallback' ),
+ $encValue );
+ return $encValue;
+ }
+
+ /**
+ * Given a value escape it so that it can be used in an id attribute and
+ * return it, this does not validate the value however (see first link)
+ *
+ * @link http://www.w3.org/TR/html401/types.html#type-name Valid characters
+ * in the id and
+ * name attributes
+ * @link http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
+ *
+ * @bug 4461
+ *
+ * @static
+ *
+ * @param string $id
+ * @return string
+ */
+ static function escapeId( $id ) {
+ static $replace = array(
+ '%3A' => ':',
+ '%' => '.'
+ );
+
+ $id = urlencode( Sanitizer::decodeCharReferences( strtr( $id, ' ', '_' ) ) );
+
+ return str_replace( array_keys( $replace ), array_values( $replace ), $id );
+ }
+
+ /**
+ * Given a value, escape it so that it can be used as a CSS class and
+ * return it.
+ *
+ * @todo For extra validity, input should be validated UTF-8.
+ *
+ * @link http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
+ *
+ * @param string $class
+ * @return string
+ */
+ static function escapeClass( $class ) {
+ // Convert ugly stuff to underscores and kill underscores in ugly places
+ return rtrim(preg_replace(
+ array('/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/','/_+/'),
+ '_',
+ $class ), '_');
+ }
+
+ /**
+ * Regex replace callback for armoring links against further processing.
+ * @param array $matches
+ * @return string
+ * @private
+ */
+ private static function armorLinksCallback( $matches ) {
+ return str_replace( ':', ':', $matches[1] );
+ }
+
+ /**
+ * Return an associative array of attribute names and values from
+ * a partial tag string. Attribute names are forces to lowercase,
+ * character references are decoded to UTF-8 text.
+ *
+ * @param string
+ * @return array
+ */
+ static function decodeTagAttributes( $text ) {
+ $attribs = array();
+
+ if( trim( $text ) == '' ) {
+ return $attribs;
+ }
+
+ $pairs = array();