Merge "skin: Revert font-weight on successbox and move to preferences"
[lhc/web/wiklou.git] / includes / upload / UploadBase.php
index 6a31582..774650b 100644 (file)
@@ -46,6 +46,8 @@ abstract class UploadBase {
        protected $mBlackListedExtensions;
        protected $mJavaDetected;
 
+       protected static $safeXmlEncodings = array( 'UTF-8', 'ISO-8859-1', 'ISO-8859-2', 'UTF-16', 'UTF-32' );
+
        const SUCCESS = 0;
        const OK = 0;
        const EMPTY_FILE = 3;
@@ -980,6 +982,15 @@ abstract class UploadBase {
                        return true;
                }
 
+               // Some browsers will interpret obscure xml encodings as UTF-8, while
+               // PHP/expat will interpret the given encoding in the xml declaration (bug 47304)
+               if ( $extension == 'svg' || strpos( $mime, 'image/svg' ) === 0 ) {
+                       if ( self::checkXMLEncodingMissmatch( $file ) ) {
+                               wfProfileOut( __METHOD__ );
+                               return true;
+                       }
+               }
+
                /**
                 * Internet Explorer for Windows performs some really stupid file type
                 * autodetection which can cause it to interpret valid image files as HTML
@@ -1051,6 +1062,62 @@ abstract class UploadBase {
                return false;
        }
 
+
+       /**
+        * Check a whitelist of xml encodings that are known not to be interpreted differently
+        * by the server's xml parser (expat) and some common browsers.
+        *
+        * @param string $file pathname to the temporary upload file
+        * @return Boolean: true if the file contains an encoding that could be misinterpreted
+        */
+       public static function checkXMLEncodingMissmatch( $file ) {
+               global $wgSVGMetadataCutoff;
+               $contents = file_get_contents( $file, false, null, -1, $wgSVGMetadataCutoff );
+               $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
+
+               if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
+                       if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+                               && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+                       ) {
+                               wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+                               return true;
+                       }
+               } elseif ( preg_match( "!<\?xml\b!si", $contents ) ) {
+                       // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+                       // bytes. There shouldn't be a legitimate reason for this to happen.
+                       wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+                       return true;
+               } elseif ( substr( $contents, 0, 4) == "\x4C\x6F\xA7\x94" ) {
+                       // EBCDIC encoded XML
+                       wfDebug( __METHOD__ . ": EBCDIC Encoded XML\n" );
+                       return true;
+               }
+
+               // It's possible the file is encoded with multi-byte encoding, so re-encode attempt to
+               // detect the encoding in case is specifies an encoding not whitelisted in self::$safeXmlEncodings
+               $attemptEncodings = array( 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' );
+               foreach ( $attemptEncodings as $encoding ) {
+                       wfSuppressWarnings();
+                       $str = iconv( $encoding, 'UTF-8', $contents );
+                       wfRestoreWarnings();
+                       if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
+                               if ( preg_match( $encodingRegex, $matches[1], $encMatch )
+                                       && !in_array( strtoupper( $encMatch[1] ), self::$safeXmlEncodings )
+                               ) {
+                                       wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'\n" );
+                                       return true;
+                               }
+                       } elseif ( $str != '' && preg_match( "!<\?xml\b!si", $str ) ) {
+                               // Start of XML declaration without an end in the first $wgSVGMetadataCutoff
+                               // bytes. There shouldn't be a legitimate reason for this to happen.
+                               wfDebug( __METHOD__ . ": Unmatched XML declaration start\n" );
+                               return true;
+                       }
+               }
+
+               return false;
+       }
+
        /**
         * @param $filename string
         * @return bool