better pattern for detecting evil scripts in rdfa attributes
authorDaniel Kinzler <daniel@users.mediawiki.org>
Sat, 7 Nov 2009 15:45:13 +0000 (15:45 +0000)
committerDaniel Kinzler <daniel@users.mediawiki.org>
Sat, 7 Nov 2009 15:45:13 +0000 (15:45 +0000)
includes/Sanitizer.php

index 30eff4b..712dc43 100644 (file)
@@ -56,6 +56,11 @@ define( 'MW_ATTRIBS_REGEX',
                )
           )?(?=$space|\$)/sx" );
 
+/**
+ * Regular expression to match URIs that could trigger script execution
+ */
+define( 'MW_SCRIPT_URL_PATTERN', '/(^|\s)(javascript|vbscript)[^\w]/i' );
+
 /**
  * List of all named character entities defined in HTML 4.01
  * http://www.w3.org/TR/html4/sgml/entities.html
@@ -631,7 +636,7 @@ class Sanitizer {
                                $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' ||
                                $attribute === 'datatype' || $attribute === 'typeof' ) {  
                                //Paranoia. Allow "simple" values but suppress javascript
-                               if ( preg_match( '/(^|\s)javascript\s*:/i', $value ) ) {
+                               if ( preg_match( MW_SCRIPT_URL_PATTERN, $value ) ) {
                                        continue; 
                                }
                        }