Merge "Fix RequestContextTest screwing up $wgUser"

[lhc/web/wiklou.git] / includes / Sanitizer.php
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php

index 3384af0..6a568c2 100644 (file)
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -3,7 +3,7 @@
   * HTML sanitizer for %MediaWiki.
   *
   * Copyright © 2002-2005 Brion Vibber <brion@pobox.com> et al
- * http://www.mediawiki.org/
+ * https://www.mediawiki.org/
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
@@ -355,12 +355,12 @@ class Sanitizer {
          * Cleans up HTML, removes dangerous tags and attributes, and
          * removes HTML comments
          * @private
-        * @param $text String
-        * @param $processCallback Callback to do any variable or parameter
-        *        replacements in HTML attribute values
-        * @param array $args for the processing callback
-        * @param array $extratags for any extra tags to include
-        * @param array $removetags for any tags (default or extra) to exclude
+        * @param string $text
+        * @param callable $processCallback Callback to do any variable or parameter
+        *   replacements in HTML attribute values
+        * @param array|bool $args Arguments for the processing callback
+        * @param array $extratags For any extra tags to include
+        * @param array $removetags For any tags (default or extra) to exclude
          * @return string
          */
         static function removeHTMLtags( $text, $processCallback = null,
@@ -459,7 +459,10 @@ class Sanitizer {
                                                 $badtag = true;
                                         } elseif ( $slash ) {
                                                 # Closing a tag... is it the one we just opened?
-                                               $ot = @array_pop( $tagstack );
+                                               wfSuppressWarnings();
+                                               $ot = array_pop( $tagstack );
+                                               wfRestoreWarnings();
+
                                                 if ( $ot != $t ) {
                                                         if ( isset( $htmlsingleallowed[$ot] ) ) {
                                                                 # Pop all elements with an optional close tag
@@ -489,7 +492,10 @@ class Sanitizer {
                                                                         }
                                                                 }
                                                         } else {
-                                                               @array_push( $tagstack, $ot );
+                                                               wfSuppressWarnings();
+                                                               array_push( $tagstack, $ot );
+                                                               wfRestoreWarnings();
+
                                                                 # <li> can be nested in <ul> or <ol>, skip those cases:
                                                                 if ( !isset( $htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
                                                                         $badtag = true;
@@ -567,9 +573,16 @@ class Sanitizer {
                 } else {
                         # this might be possible using tidy itself
                         foreach ( $bits as $x ) {
-                               preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
-                               $x, $regs );
-                               @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
+                               preg_match(
+                                       '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/',
+                                       $x,
+                                       $regs
+                               );
+
+                               wfSuppressWarnings();
+                               list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
+                               wfRestoreWarnings();
+
                                 $badtag = false;
                                 if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
                                         if ( is_callable( $processCallback ) ) {
@@ -601,7 +614,7 @@ class Sanitizer {
          * trailing spaces and one of the newlines.
          *
          * @private
-        * @param $text String
+        * @param string $text
          * @return string
          */
         static function removeHTMLcomments( $text ) {
@@ -631,8 +644,7 @@ class Sanitizer {
                                 # Remove the comment, leading and trailing
                                 # spaces, and leave only one newline.
                                 $text = substr_replace( $text, "\n", $spaceStart, $spaceLen + 1 );
-                       }
-                       else {
+                       } else {
                                 # Remove just the comment.
                                 $text = substr_replace( $text, '', $start, $end - $start );
                         }
@@ -649,8 +661,8 @@ class Sanitizer {
          * where we may want to allow a tag within content but ONLY when it has
          * specific attributes set.
          *
-        * @param $params
-        * @param $element
+        * @param string $params
+        * @param string $element
          * @return bool
          */
         static function validateTag( $params, $element ) {
@@ -682,9 +694,9 @@ class Sanitizer {
          * - Unsafe style attributes are discarded
          * - Invalid id attributes are re-encoded
          *
-        * @param $attribs Array
-        * @param $element String
-        * @return Array
+        * @param array $attribs
+        * @param string $element
+        * @return array
          *
          * @todo Check for legal values where the DTD limits things.
          * @todo Check for unique id attribute :P
@@ -702,9 +714,9 @@ class Sanitizer {
          * - Unsafe style attributes are discarded
          * - Invalid id attributes are re-encoded
          *
-        * @param $attribs Array
+        * @param array $attribs
          * @param array $whitelist list of allowed attribute names
-        * @return Array
+        * @return array
          *
          * @todo Check for legal values where the DTD limits things.
          * @todo Check for unique id attribute :P
@@ -801,8 +813,8 @@ class Sanitizer {
          * will be combined (if they're both strings).
          *
          * @todo implement merging for other attributes such as style
-        * @param $a Array
-        * @param $b Array
+        * @param array $a
+        * @param array $b
          * @return array
          */
         static function mergeAttributes( $a, $b ) {
@@ -833,8 +845,8 @@ class Sanitizer {
          * clever input strings. These character references must
          * be escaped before the return value is embedded in HTML.
          *
-        * @param $value String
-        * @return String
+        * @param string $value
+        * @return string
          */
         static function checkCss( $value ) {
                 // Decode character references like &#123;
@@ -867,7 +879,7 @@ class Sanitizer {
  
                 // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
                 $value = preg_replace_callback(
-                       '/[ï¼\81-ï½\9a]/u', // U+FF01 to U+FF5A
+                       '/[ï¼\81-ï¼»ï¼½-ï½\9a]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
                         function ( $matches ) {
                                 $cp = utf8ToCodepoint( $matches[0] );
                                 if ( $cp === false ) {
@@ -925,15 +937,25 @@ class Sanitizer {
                 // Reject problematic keywords and control characters
                 if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
                         return '/* invalid control char */';
-               } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', $value ) ) {
+               } elseif ( preg_match(
+                       '! expression
+                               | filter\s*:
+                               | accelerator\s*:
+                               | -o-link\s*:
+                               | -o-link-source\s*:
+                               | -o-replace\s*:
+                               | url\s*\(
+                               | image\s*\(
+                               | image-set\s*\(
+                       !ix', $value ) ) {
                         return '/* insecure input */';
                 }
                 return $value;
         }
  
         /**
-        * @param $matches array
-        * @return String
+        * @param array $matches
+        * @return string
          */
         static function cssDecodeCallback( $matches ) {
                 if ( $matches[1] !== '' ) {
@@ -971,9 +993,9 @@ class Sanitizer {
          * - Unsafe style attributes are discarded
          * - Prepends space if there are attributes.
          *
-        * @param $text String
-        * @param $element String
-        * @return String
+        * @param string $text
+        * @param string $element
+        * @return string
          */
         static function fixTagAttributes( $text, $element ) {
                 if ( trim( $text ) == '' ) {
@@ -988,8 +1010,8 @@ class Sanitizer {
  
         /**
          * Encode an attribute value for HTML output.
-        * @param $text String
-        * @return HTML-encoded text fragment
+        * @param string $text
+        * @return string HTML-encoded text fragment
          */
         static function encodeAttribute( $text ) {
                 $encValue = htmlspecialchars( $text, ENT_QUOTES );
@@ -1009,8 +1031,8 @@ class Sanitizer {
         /**
          * Encode an attribute value for HTML tags, with extra armoring
          * against further wiki processing.
-        * @param $text String
-        * @return HTML-encoded text fragment
+        * @param string $text
+        * @return string HTML-encoded text fragment
          */
         static function safeEncodeAttribute( $text ) {
                 $encValue = Sanitizer::encodeAttribute( $text );
@@ -1053,14 +1075,14 @@ class Sanitizer {
          * (which don't work reliably in fragments cross-browser).
          *
          * @see http://www.w3.org/TR/html401/types.html#type-name Valid characters
-        *                                                          in the id and
-        *                                                          name attributes
-        * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with the id attribute
+        *   in the id and name attributes
+        * @see http://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
+        *   the id attribute
          * @see http://www.whatwg.org/html/elements.html#the-id-attribute
          *   HTML5 definition of id attribute
          *
          * @param string $id id to escape
-        * @param $options Mixed: string or array of strings (default is array()):
+        * @param string|array $options String or array of strings (default is array()):
          *   'noninitial': This is a non-initial fragment of an id, not a full id,
          *       so don't pay attention if the first character isn't valid at the
          *       beginning of an id.  Only matters if $wgExperimentalHtmlIds is
@@ -1068,7 +1090,7 @@ class Sanitizer {
          *   'legacy': Behave the way the old HTML 4-based ID escaping worked even
          *       if $wgExperimentalHtmlIds is used, so we can generate extra
          *       anchors and links won't break.
-        * @return String
+        * @return string
          */
         static function escapeId( $id, $options = array() ) {
                 global $wgExperimentalHtmlIds;
@@ -1111,8 +1133,8 @@ class Sanitizer {
          *
          * @see http://www.w3.org/TR/CSS21/syndata.html Valid characters/format
          *
-        * @param $class String
-        * @return String
+        * @param string $class
+        * @return string
          */
         static function escapeClass( $class ) {
                 // Convert ugly stuff to underscores and kill underscores in ugly places
@@ -1126,8 +1148,8 @@ class Sanitizer {
          * Given HTML input, escape with htmlspecialchars but un-escape entities.
          * This allows (generally harmless) entities like &#160; to survive.
          *
-        * @param string $html to escape
-        * @return String: escaped input
+        * @param string $html HTML to escape
+        * @return string Escaped input
          */
         static function escapeHtmlAllowEntities( $html ) {
                 $html = Sanitizer::decodeCharReferences( $html );
@@ -1139,7 +1161,7 @@ class Sanitizer {
  
         /**
          * Regex replace callback for armoring links against further processing.
-        * @param $matches Array
+        * @param array $matches
          * @return string
          */
         private static function armorLinksCallback( $matches ) {
@@ -1151,8 +1173,8 @@ class Sanitizer {
          * a partial tag string. Attribute names are forces to lowercase,
          * character references are decoded to UTF-8 text.
          *
-        * @param $text String
-        * @return Array
+        * @param string $text
+        * @return array
          */
         public static function decodeTagAttributes( $text ) {
                 if ( trim( $text ) == '' ) {
@@ -1187,8 +1209,8 @@ class Sanitizer {
          * Build a partial tag string from an associative array of attribute
          * names and values as returned by decodeTagAttributes.
          *
-        * @param $assoc_array Array
-        * @return String
+        * @param array $assoc_array
+        * @return string
          */
         public static function safeEncodeTagAttributes( $assoc_array ) {
                 $attribs = array();
@@ -1205,9 +1227,9 @@ class Sanitizer {
          * Pick the appropriate attribute value from a match set from the
          * attribs regex matches.
          *
-        * @param $set Array
-        * @throws MWException
-        * @return String
+        * @param array $set
+        * @throws MWException when tag conditions are not met.
+        * @return string
          */
         private static function getTagAttributeCallback( $set ) {
                 if ( isset( $set[6] ) ) {
@@ -1239,8 +1261,9 @@ class Sanitizer {
          * but note that we're not returning the value, but are returning
          * XML source fragments that will be slapped into output.
          *
-        * @param $text String
-        * @return String
+        * @param string $text
+        * @return string
+        * @todo Remove, unused?
          */
         private static function normalizeAttributeValue( $text ) {
                 return str_replace( '"', '&quot;',
@@ -1249,8 +1272,8 @@ class Sanitizer {
         }
  
         /**
-        * @param $text string
-        * @return mixed
+        * @param string $text
+        * @return string
          */
         private static function normalizeWhitespace( $text ) {
                 return preg_replace(
@@ -1264,8 +1287,8 @@ class Sanitizer {
          * by Parser::stripSectionName(), for use in the id's that are used for
          * section links.
          *
-        * @param $section String
-        * @return String
+        * @param string $section
+        * @return string
          */
         static function normalizeSectionNameWhitespace( $section ) {
                 return trim( preg_replace( '/[ _]+/', ' ', $section ) );
@@ -1282,8 +1305,8 @@ class Sanitizer {
          * c. use lower cased "&#x", not "&#X"
          * d. fix or reject non-valid attributes
          *
-        * @param $text String
-        * @return String
+        * @param string $text
+        * @return string
          * @private
          */
         static function normalizeCharReferences( $text ) {
@@ -1292,9 +1315,10 @@ class Sanitizer {
                         array( 'Sanitizer', 'normalizeCharReferencesCallback' ),
                         $text );
         }
+
         /**
-        * @param $matches String
-        * @return String
+        * @param string $matches
+        * @return string
          */
         static function normalizeCharReferencesCallback( $matches ) {
                 $ret = null;
@@ -1319,8 +1343,8 @@ class Sanitizer {
          * the HTML equivalent. Otherwise, returns HTML-escaped text of
          * pseudo-entity source (eg &amp;foo;)
          *
-        * @param $name String
-        * @return String
+        * @param string $name
+        * @return string
          */
         static function normalizeEntity( $name ) {
                 if ( isset( self::$htmlEntityAliases[$name] ) ) {
@@ -1336,7 +1360,7 @@ class Sanitizer {
         }
  
         /**
-        * @param $codepoint
+        * @param int $codepoint
          * @return null|string
          */
         static function decCharReference( $codepoint ) {
@@ -1349,7 +1373,7 @@ class Sanitizer {
         }
  
         /**
-        * @param $codepoint
+        * @param int $codepoint
          * @return null|string
          */
         static function hexCharReference( $codepoint ) {
@@ -1363,8 +1387,8 @@ class Sanitizer {
  
         /**
          * Returns true if a given Unicode codepoint is a valid character in XML.
-        * @param $codepoint Integer
-        * @return Boolean
+        * @param int $codepoint
+        * @return bool
          */
         private static function validateCodepoint( $codepoint ) {
                 return $codepoint == 0x09
@@ -1379,8 +1403,8 @@ class Sanitizer {
          * Decode any character references, numeric or named entities,
          * in the text and return a UTF-8 string.
          *
-        * @param $text String
-        * @return String
+        * @param string $text
+        * @return string
          */
         public static function decodeCharReferences( $text ) {
                 return preg_replace_callback(
@@ -1396,8 +1420,8 @@ class Sanitizer {
          * This is useful for page titles, not for text to be displayed,
          * MediaWiki allows HTML entities to escape normalization as a feature.
          *
-        * @param string $text (already normalized, containing entities)
-        * @return String (still normalized, without entities)
+        * @param string $text Already normalized, containing entities
+        * @return string Still normalized, without entities
          */
         public static function decodeCharReferencesAndNormalize( $text ) {
                 global $wgContLang;
@@ -1414,8 +1438,8 @@ class Sanitizer {
         }
  
         /**
-        * @param $matches String
-        * @return String
+        * @param string $matches
+        * @return string
          */
         static function decodeCharReferencesCallback( $matches ) {
                 if ( $matches[1] != '' ) {
@@ -1432,8 +1456,8 @@ class Sanitizer {
         /**
          * Return UTF-8 string for a codepoint if that is a valid
          * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
-        * @param $codepoint Integer
-        * @return String
+        * @param int $codepoint
+        * @return string
          * @private
          */
         static function decodeChar( $codepoint ) {
@@ -1449,8 +1473,8 @@ class Sanitizer {
          * return the UTF-8 encoding of that character. Otherwise, returns
          * pseudo-entity source (eg "&foo;")
          *
-        * @param $name String
-        * @return String
+        * @param string $name
+        * @return string
          */
         static function decodeEntity( $name ) {
                 if ( isset( self::$htmlEntityAliases[$name] ) ) {
@@ -1466,8 +1490,8 @@ class Sanitizer {
         /**
          * Fetch the whitelist of acceptable attributes for a given element name.
          *
-        * @param $element String
-        * @return Array
+        * @param string $element
+        * @return array
          */
         static function attributeWhitelist( $element ) {
                 $list = Sanitizer::setupAttributeWhitelist();
@@ -1479,15 +1503,15 @@ class Sanitizer {
         /**
          * Foreach array key (an allowed HTML element), return an array
          * of allowed attributes
-        * @return Array
+        * @return array
          */
         static function setupAttributeWhitelist() {
                 global $wgAllowRdfaAttributes, $wgAllowMicrodataAttributes;
-
                 static $whitelist, $staticInitialised;
+
                 $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgAllowMicrodataAttributes' ) );
  
-               if ( isset( $whitelist ) && $staticInitialised == $globalContext ) {
+               if ( $whitelist !== null && $staticInitialised == $globalContext ) {
                         return $whitelist;
                 }
  
@@ -1702,7 +1726,7 @@ class Sanitizer {
          * inclusion in HTML output as of 1.10!
          *
          * @param string $text HTML fragment
-        * @return String
+        * @return string
          */
         static function stripAllTags( $text ) {
                 # Actual <tags>
@@ -1722,7 +1746,7 @@ class Sanitizer {
          *
          * Use for passing XHTML fragments to PHP's XML parsing functions
          *
-        * @return String
+        * @return string
          */
         static function hackDocType() {
                 $out = "<!DOCTYPE html [\n";
@@ -1734,7 +1758,7 @@ class Sanitizer {
         }
  
         /**
-        * @param $url string
+        * @param string $url
          * @return mixed|string
          */
         static function cleanUrl( $url ) {
@@ -1781,7 +1805,7 @@ class Sanitizer {
         }
  
         /**
-        * @param $matches array
+        * @param array $matches
          * @return string
          */
         static function cleanUrlCallback( $matches ) {
@@ -1814,7 +1838,7 @@ class Sanitizer {
          * @since 1.18
          *
          * @param string $addr E-mail address
-        * @return Bool
+        * @return bool
          */
         public static function validateEmail( $addr ) {
                 $result = null;
@@ -1828,7 +1852,7 @@ class Sanitizer {
                 $rfc5322_atext = "a-z0-9!#$%&'*+\\-\/=?^_`{|}~";
                 $rfc1034_ldh_str = "a-z0-9\\-";
  
-               $HTML5_email_regexp = "/
+               $html5_email_regexp = "/
                 ^                      # start of string
                 [$rfc5322_atext\\.]+    # user part which is liberal :p
                 @                      # 'apostrophe'
@@ -1837,6 +1861,6 @@ class Sanitizer {
                 $                      # End of string
                 /ix"; // case Insensitive, eXtended
  
-               return (bool)preg_match( $HTML5_email_regexp, $addr );
+               return (bool)preg_match( $html5_email_regexp, $addr );
         }
  }