* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
- * @addtogroup Parser
+ * @file
+ * @ingroup Parser
*/
/**
/**
* XHTML sanitizer for MediaWiki
- * @addtogroup Parser
+ * @ingroup Parser
*/
class Sanitizer {
const NONE = 0;
}
/**
- * Merge two sets of HTML attributes.
- * Conflicting items in the second set will override those
- * in the first, except for 'class' attributes which will be
- * combined.
+ * Merge two sets of HTML attributes. Conflicting items in the second set
+ * will override those in the first, except for 'class' attributes which
+ * will be combined (if they're both strings).
*
* @todo implement merging for other attributes such as style
* @param array $a
*/
static function mergeAttributes( $a, $b ) {
$out = array_merge( $a, $b );
- if( isset( $a['class'] )
- && isset( $b['class'] )
- && $a['class'] !== $b['class'] ) {
-
- $out['class'] = implode( ' ',
- array_unique(
- preg_split( '/\s+/',
- $a['class'] . ' ' . $b['class'],
- -1,
- PREG_SPLIT_NO_EMPTY ) ) );
+ if( isset( $a['class'] ) && isset( $b['class'] )
+ && is_string( $a['class'] ) && is_string( $b['class'] )
+ && $a['class'] !== $b['class'] ) {
+ $classes = preg_split( '/\s+/', "{$a['class']} {$b['class']}",
+ -1, PREG_SPLIT_NO_EMPTY );
+ $out['class'] = implode( ' ', array_unique( $classes ) );
}
return $out;
}
$class ), '_');
}
+ /**
+ * Given HTML input, escape with htmlspecialchars but un-escape entites.
+ * This allows (generally harmless) entities like to survive.
+ *
+ * @param string $html String to escape
+ * @return string Escaped input
+ */
+ static function escapeHtmlAllowEntities( $html ) {
+ # It seems wise to escape ' as well as ", as a matter of course. Can't
+ # hurt.
+ $html = htmlspecialchars( $html, ENT_QUOTES );
+ $html = str_replace( '&', '&', $html );
+ $html = Sanitizer::normalizeCharReferences( $html );
+ return $html;
+ }
+
/**
* Regex replace callback for armoring links against further processing.
* @param array $matches
* @param string
* @return array
*/
- static function decodeTagAttributes( $text ) {
+ public static function decodeTagAttributes( $text ) {
$attribs = array();
if( trim( $text ) == '' ) {
return $out;
}
- static function cleanUrl( $url, $hostname=true ) {
+ static function cleanUrl( $url ) {
# Normalize any HTML entities in input. They will be
# re-escaped by makeExternalLink().
$url = Sanitizer::decodeCharReferences( $url );