From 52f05fd8fa14bcd6c5876857263b846067b743c3 Mon Sep 17 00:00:00 2001 From: Daniel Kinzler Date: Sat, 7 Nov 2009 16:46:34 +0000 Subject: [PATCH] adding support for tags as a parser tag hook, in order to support rdfa output --- includes/Linker.php | 5 ++++- includes/Sanitizer.php | 14 ++++++++++++++ includes/parser/Parser.php | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/includes/Linker.php b/includes/Linker.php index f6b3d688de..b967a87379 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -760,7 +760,10 @@ class Linker { * hook play with them, *then* expand it all at once. */ function makeExternalLink( $url, $text, $escape = true, $linktype = '', $attribs = array() ) { - $attribsText = $this->getExternalLinkAttributes( 'external ' . $linktype ); + if ( isset( $attribs[ 'class' ] ) ) $class = $attribs[ 'class' ]; # yet another hack :( + else $class = 'external ' . $linktype; + + $attribsText = $this->getExternalLinkAttributes( $class ); $url = htmlspecialchars( $url ); if( $escape ) { $text = htmlspecialchars( $text ); diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index 712dc43991..1af5cf7181 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -610,6 +610,8 @@ class Sanitizer { */ static function validateAttributes( $attribs, $whitelist ) { $whitelist = array_flip( $whitelist ); + $hrefExp = '/^(' . wfUrlProtocols() . ')[^\s]+$/'; + $out = array(); foreach( $attribs as $attribute => $value ) { if( !isset( $whitelist[$attribute] ) ) { @@ -641,6 +643,15 @@ class Sanitizer { } } + # NOTE: even though elements using href/src are not allowed directly, supply + # validation code that can be used by tag hook handlers, etc + if ( $attribute === 'href' || $attribute === 'src' ) { + if ( !preg_match( $hrefExp, $value ) ) { + continue; //drop any href or src attributes not using an allowed protocol. + //NOTE: this also drops all relative URLs + } + } + // If this attribute was previously set, override it. // Output should only have one attribute of each name. $out[$attribute] = $value; @@ -1279,6 +1290,9 @@ class Sanitizer { 'td' => array_merge( $common, $tablecell, $tablealign ), 'th' => array_merge( $common, $tablecell, $tablealign ), + # 12.2 # NOTE: is not allowed directly, but the attrib whitelist is used from the Parser object + 'a' => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa + # 13.2 # Not usually allowed, but may be used for extension-style hooks # such as when it is rasterized diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index ae9b8322f3..725c284e58 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -129,7 +129,7 @@ class Parser $this->mFunctionHooks = array(); $this->mFunctionTagHooks = array(); $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); - $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' ); + $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery', 'a' ); $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; @@ -3284,6 +3284,9 @@ class Parser case 'gallery': $output = $this->renderImageGallery( $content, $attributes ); break; + case 'a': + $output = $this->renderHyperlink( $content, $attributes, $frame ); + break; case 'math': if ( $this->mOptions->getUseTeX() ) { $output = $wgContLang->armourMath( @@ -4332,6 +4335,35 @@ class Parser ''; } + /** + * Tag hook handler for 'a'. Renders a HTML <a> tag, allowing most attributes, filtering href against + * allowed protocols and spam blacklist. + **/ + function renderHyperlink( $text, $params, $frame = false ) { + foreach ( $params as $name => $value ) { + $params[ $name ] = $this->replaceVariables( $value, $frame ); + } + + $whitelist = Sanitizer::attributeWhitelist( 'a' ); + $params = Sanitizer::validateAttributes( $params, $whitelist ); + + $content = $this->recursiveTagParse( trim( $text ), $frame ); + + if ( isset( $params[ 'href' ] ) ) { + $href = $params[ 'href' ]; + $this->mOutput->addExternalLink( $href ); + unset( $params[ 'href' ] ); + } else { + # Non-link tag + return Xml::openElement( 'a', $params ) . $content . Xml::closeElement( 'a' ); + } + + $sk = $this->mOptions->getSkin(); + $html = $sk->makeExternalLink( $href, $content, false, '', $params ); + + return $html; + } + /** * Renders an image gallery from a text with one line per image. * text labels may be given by using |-style alternative text. E.g. -- 2.20.1