* http://www.gnu.org/copyleft/gpl.html
*
* @package MediaWiki
+ * @subpackage Parser
*/
class Sanitizer {
* Cleans up HTML, removes dangerous tags and attributes, and
* removes HTML comments
* @access private
+ * @param string $text
+ * @return string
*/
function removeHTMLtags( $text ) {
global $wgUseTidy, $wgUserHtml;
* trailing spaces and one of the newlines.
*
* @access private
+ * @param string $text
+ * @return string
*/
function removeHTMLcomments( $text ) {
$fname='Parser::removeHTMLcomments';
if( !isset( $whitelist[$attribute] ) ) {
continue;
}
- if( $set[2] == '' ) {
+ if( !isset( $set[2] ) ) {
# In XHTML, attributes must have a value.
$value = $set[1];
} elseif( $set[3] != '' ) {
array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
$text );
}
-
+ /**
+ * @param string $matches
+ * @return string
+ */
function normalizeCharReferencesCallback( $matches ) {
$ret = null;
if( $matches[1] != '' ) {
* return the named entity reference as is. Otherwise, returns
* HTML-escaped text of pseudo-entity source (eg &foo;)
*
+ * @param string $name
* @return string
*/
function normalizeEntity( $name ) {
);
return $whitelist;
}
+
+ /**
+ * Take a fragment of (potentially invalid) HTML and return
+ * a version with any tags removed, encoded suitably for literal
+ * inclusion in an attribute value.
+ *
+ * @param string $text HTML fragment
+ * @return string
+ */
+ function stripAllTags( $text ) {
+ # Actual <tags>
+ $text = preg_replace( '/<[^>]*>/', '', $text );
+
+ # Normalize &entities and whitespace
+ $text = Sanitizer::normalizeAttributeValue( $text );
+
+ # Will be placed into "double-quoted" attributes,
+ # make sure remaining bits are safe.
+ $text = str_replace(
+ array('<', '>', '"'),
+ array('<', '>', '"'),
+ $text );
+
+ return $text;
+ }
}
-?>
\ No newline at end of file
+?>