Merge "Add support for PHP7 random_bytes in favor of mcrypt_create_iv"
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 3ec0596..8db1fe3 100644 (file)
@@ -89,13 +89,15 @@ class Parser {
        # Everything except bracket, space, or control characters
        # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
        # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
-       const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
+       # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
+       # uses to replace invalid HTML characters.
+       const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
        # Simplified expression to match an IPv4 or IPv6 address, or
        # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
-       const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
+       const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
        # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
        // @codingStandardsIgnoreStart Generic.Files.LineLength
-       const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
+       const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
                \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
        // @codingStandardsIgnoreEnd
 
@@ -264,7 +266,7 @@ class Parser {
                $this->mUrlProtocols = wfUrlProtocols();
                $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
                        self::EXT_LINK_ADDR .
-                       self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
+                       self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
                if ( isset( $conf['preprocessorClass'] ) ) {
                        $this->mPreprocessorClass = $conf['preprocessorClass'];
                } elseif ( defined( 'HPHP_VERSION' ) ) {
@@ -330,7 +332,9 @@ class Parser {
                CoreTagHooks::register( $this );
                $this->initialiseVariables();
 
-               Hooks::run( 'ParserFirstCallInit', [ &$this ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
        }
 
        /**
@@ -381,7 +385,9 @@ class Parser {
 
                $this->mProfiler = new SectionProfiler();
 
-               Hooks::run( 'ParserClearState', [ &$this ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'ParserClearState', [ &$parser ] );
        }
 
        /**
@@ -413,6 +419,8 @@ class Parser {
                        $text = strtr( $text, "\x7f", "?" );
                        $magicScopeVariable = $this->lock();
                }
+               // Strip U+0000 NULL (T159174)
+               $text = str_replace( "\000", '', $text );
 
                $this->startParse( $title, $options, self::OT_HTML, $clearState );
 
@@ -435,11 +443,13 @@ class Parser {
                        $this->mRevisionSize = null;
                }
 
-               Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
                # No more strip!
-               Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
+               Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
                $text = $this->internalParse( $text );
-               Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
+               Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
 
                $text = $this->internalParseHalfParsed( $text, true, $linestart );
 
@@ -615,8 +625,10 @@ class Parser {
         * @return string UNSAFE half-parsed HTML
         */
        public function recursiveTagParse( $text, $frame = false ) {
-               Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
-               Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
+               Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
                $text = $this->internalParse( $text, false, $frame );
                return $text;
        }
@@ -663,8 +675,10 @@ class Parser {
                if ( $revid !== null ) {
                        $this->mRevisionId = $revid;
                }
-               Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
-               Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
+               Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
                $text = $this->replaceVariables( $text, $frame );
                $text = $this->mStripState->unstripBoth( $text );
                return $text;
@@ -1259,8 +1273,11 @@ class Parser {
 
                $origText = $text;
 
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+
                # Hook to suspend the parser in this state
-               if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
+               if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
                        return $text;
                }
 
@@ -1280,16 +1297,16 @@ class Parser {
                        $text = $this->replaceVariables( $text );
                }
 
-               Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
+               Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
                $text = Sanitizer::removeHTMLtags(
                        $text,
-                       [ &$this, 'attributeStripCallback' ],
+                       [ $this, 'attributeStripCallback' ],
                        false,
                        array_keys( $this->mTransparentTagHooks ),
                        [],
-                       [ &$this, 'addTrackingCategory' ]
+                       [ $this, 'addTrackingCategory' ]
                );
-               Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
+               Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
 
                # Tables need to come after variable replacement for things to work
                # properly; putting them before other transformations should keep
@@ -1328,8 +1345,11 @@ class Parser {
        private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
                $text = $this->mStripState->unstripGeneral( $text );
 
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+
                if ( $isMain ) {
-                       Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
+                       Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
                }
 
                # Clean up special characters, only run once, next-to-last before doBlockLevels
@@ -1368,7 +1388,7 @@ class Parser {
                $text = $this->mStripState->unstripNoWiki( $text );
 
                if ( $isMain ) {
-                       Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
+                       Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
                }
 
                $text = $this->replaceTransparentTags( $text );
@@ -1409,7 +1429,7 @@ class Parser {
                }
 
                if ( $isMain ) {
-                       Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
+                       Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
                }
 
                return $text;
@@ -1434,20 +1454,21 @@ class Parser {
                $spdash = "(?:-|$space)"; # a dash or a non-newline space
                $spaces = "$space++"; # possessive match of 1 or more spaces
                $text = preg_replace_callback(
-                       '!(?:                            # Start cases
-                               (<a[ \t\r\n>].*?</a>) |      # m[1]: Skip link text
-                               (<.*?>) |                    # m[2]: Skip stuff inside
-                                                            #       HTML elements' . "
-                               (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
-                                                            # m[4]: Post-protocol path
-                               \b(?:RFC|PMID) $spaces       # m[5]: RFC or PMID, capture number
+                       '!(?:                        # Start cases
+                               (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
+                               (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
+                               (\b                        # m[3]: Free external links
+                                       (?i:$prots)
+                                       ($addr$urlChar*)         # m[4]: Post-protocol path
+                               ) |
+                               \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
                                        ([0-9]+)\b |
-                               \bISBN $spaces (             # m[6]: ISBN, capture number
+                               \bISBN $spaces (           # m[6]: ISBN, capture number
                                        (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
                                        (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
                                        [0-9Xx]                  #  check digit
                                )\b
-                       )!xu", [ &$this, 'magicLinkCallback' ], $text );
+                       )!xu", [ $this, 'magicLinkCallback' ], $text );
                return $text;
        }
 
@@ -2467,7 +2488,7 @@ class Parser {
         *
         * @private
         *
-        * @param int $index
+        * @param string $index Magic variable identifier as mapped in MagicWord::$mVariableIDs
         * @param bool|PPFrame $frame
         *
         * @throws MWException
@@ -2486,18 +2507,21 @@ class Parser {
                                . ' called while parsing (no title set)' );
                }
 
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+
                /**
                 * Some of these require message or data lookups and can be
                 * expensive to check many times.
                 */
-               if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
+               if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) ) {
                        if ( isset( $this->mVarCache[$index] ) ) {
                                return $this->mVarCache[$index];
                        }
                }
 
                $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
-               Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
+               Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
 
                $pageLang = $this->getFunctionLang();
 
@@ -2810,7 +2834,7 @@ class Parser {
                                $ret = null;
                                Hooks::run(
                                        'ParserGetVariableValueSwitch',
-                                       [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
+                                       [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
                                );
 
                                return $ret;
@@ -3354,7 +3378,10 @@ class Parser {
                        throw new MWException( "Tag hook for $function is not callable\n" );
                }
 
-               $allArgs = [ &$this ];
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+
+               $allArgs = [ &$parser ];
                if ( $flags & self::SFH_OBJECT_ARGS ) {
                        # Convert arguments to PPNodes and collect for appending to $allArgs
                        $funcArgs = [];
@@ -3863,7 +3890,9 @@ class Parser {
                                        throw new MWException( "Tag hook for $name is not callable\n" );
                                }
 
-                               $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
+                               // Avoid PHP 7.1 warning from passing $this by reference
+                               $parser = $this;
+                               $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
                        } else {
                                $output = '<span class="error">Invalid tag extension name: ' .
                                        htmlspecialchars( $name ) . '</span>';
@@ -4438,6 +4467,9 @@ class Parser {
                $this->startParse( $title, $options, self::OT_WIKI, $clearState );
                $this->setUser( $user );
 
+               // Strip U+0000 NULL (T159174)
+               $text = str_replace( "\000", '', $text );
+
                // We still normalize line endings for backwards-compatibility
                // with other code that just calls PST, but this should already
                // be handled in TextContent subclasses
@@ -4966,7 +4998,9 @@ class Parser {
                }
                $ig->setAdditionalOptions( $params );
 
-               Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
+               // Avoid PHP 7.1 warning from passing $this by reference
+               $parser = $this;
+               Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
 
                $lines = StringUtils::explode( "\n", $text );
                foreach ( $lines as $line ) {