+
+ /**
+ * Encode an attribute value for HTML tags, with extra armoring
+ * against further wiki processing.
+ * @param $text String
+ * @return HTML-encoded text fragment
+ */
+ static function safeEncodeAttribute( $text ) {
+ $encValue = Sanitizer::encodeAttribute( $text );
+
+ # Templates and links may be expanded in later parsing,
+ # creating invalid or dangerous output. Suppress this.
+ $encValue = strtr( $encValue, array(
+ '<' => '<', // This should never happen,
+ '>' => '>', // we've received invalid input
+ '"' => '"', // which should have been escaped.
+ '{' => '{',
+ '[' => '[',
+ "''" => '''',
+ 'ISBN' => 'ISBN',
+ 'RFC' => 'RFC',
+ 'PMID' => 'PMID',
+ '|' => '|',
+ '__' => '__',
+ ) );
+
+ # Stupid hack
+ $encValue = preg_replace_callback(
+ '/(' . wfUrlProtocols() . ')/',
+ array( 'Sanitizer', 'armorLinksCallback' ),
+ $encValue );
+ return $encValue;
+ }
+
+ /**
+ * Given a value escape it so that it can be used in an id attribute and
+ * return it, this does not validate the value however (see first link)
+ *
+ * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters
+ * in the id and
+ * name attributes
+ * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
+ *
+ * @param $id String: id to validate
+ * @param $options Mixed: string or array of strings (default is array()):
+ * 'noninitial': This is a non-initial fragment of an id, not a full id,
+ * so don't pay attention if the first character isn't valid at the
+ * beginning of an id.
+ * 'xml': Don't restrict the id to be HTML4-compatible. This option
+ * allows any alphabetic character to be used, per the XML standard.
+ * Therefore, it also completely changes the type of escaping: instead
+ * of weird dot-encoding, runs of invalid characters (mostly
+ * whitespace) are just compressed into a single underscore.
+ * @return String
+ */
+ static function escapeId( $id, $options = array() ) {
+ $options = (array)$options;
+
+ if ( !in_array( 'xml', $options ) ) {
+ # HTML4-style escaping
+ static $replace = array(
+ '%3A' => ':',
+ '%' => '.'
+ );
+
+ $id = urlencode( Sanitizer::decodeCharReferences( strtr( $id, ' ', '_' ) ) );
+ $id = str_replace( array_keys( $replace ), array_values( $replace ), $id );
+
+ if ( !preg_match( '/^[a-zA-Z]/', $id )
+ && !in_array( 'noninitial', $options ) ) {
+ // Initial character must be a letter!
+ $id = "x$id";
+ }
+ return $id;
+ }
+
+ # XML-style escaping. For the patterns used, see the XML 1.0 standard,
+ # 5th edition, NameStartChar and NameChar: <http://www.w3.org/TR/REC-xml/>
+ $nameStartChar = ':a-zA-Z_\xC0-\xD6\xD8-\xF6\xF8-\x{2FF}\x{370}-\x{37D}'
+ . '\x{37F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}'
+ . '\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}';
+ $nameChar = $nameStartChar . '.\-0-9\xB7\x{0300}-\x{036F}'
+ . '\x{203F}-\x{2040}';
+ # Replace _ as well so we don't get multiple consecutive underscores
+ $id = preg_replace( "/([^$nameChar]|_)+/u", '_', $id );
+ $id = trim( $id, '_' );
+
+ if ( !preg_match( "/^[$nameStartChar]/u", $id )
+ && !in_array( 'noninitial', $options ) ) {
+ $id = "_$id";
+ }
+
+ return $id;
+ }
+
+ /**
+ * Given a value, escape it so that it can be used as a CSS class and
+ * return it.
+ *
+ * @todo For extra validity, input should be validated UTF-8.
+ *
+ * @see http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
+ *
+ * @param $class String
+ * @return String
+ */
+ static function escapeClass( $class ) {
+ // Convert ugly stuff to underscores and kill underscores in ugly places
+ return rtrim(preg_replace(
+ array('/(^[0-9\\-])|[\\x00-\\x20!"#$%&\'()*+,.\\/:;<=>?@[\\]^`{|}~]|\\xC2\\xA0/','/_+/'),
+ '_',
+ $class ), '_');
+ }
+
+ /**
+ * Given HTML input, escape with htmlspecialchars but un-escape entites.
+ * This allows (generally harmless) entities like to survive.
+ *
+ * @param $html String to escape
+ * @return String: escaped input
+ */
+ static function escapeHtmlAllowEntities( $html ) {
+ # It seems wise to escape ' as well as ", as a matter of course. Can't
+ # hurt.
+ $html = htmlspecialchars( $html, ENT_QUOTES );
+ $html = str_replace( '&', '&', $html );
+ $html = Sanitizer::normalizeCharReferences( $html );
+ return $html;
+ }
+