X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=8db1fe3794c10357ef5e20a7e47fa8575de2f32d;hp=79fc1722f98e614996d3e30413e2661618396fd8;hb=22806b0a4509e97b56fb52b387e17e3c80fb7eb2;hpb=350dcb4cd7990daa4ea8fe4c8335f9353f442605 diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 79fc1722f9..8db1fe3794 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -89,13 +89,15 @@ class Parser { # Everything except bracket, space, or control characters # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052 - const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; + # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM + # uses to replace invalid HTML characters. + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]'; # Simplified expression to match an IPv4 or IPv6 address, or # at least one character of a host name (embeds EXT_LINK_URL_CLASS) - const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])'; + const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])'; # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR) // @codingStandardsIgnoreStart Generic.Files.LineLength - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+) + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+) \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; // @codingStandardsIgnoreEnd @@ -264,7 +266,7 @@ class Parser { $this->mUrlProtocols = wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . self::EXT_LINK_ADDR . - self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; + self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } elseif ( defined( 'HPHP_VERSION' ) ) { @@ -330,7 +332,9 @@ class Parser { CoreTagHooks::register( $this ); $this->initialiseVariables(); - Hooks::run( 'ParserFirstCallInit', [ &$this ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'ParserFirstCallInit', [ &$parser ] ); } /** @@ -381,7 +385,9 @@ class Parser { $this->mProfiler = new SectionProfiler(); - Hooks::run( 'ParserClearState', [ &$this ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'ParserClearState', [ &$parser ] ); } /** @@ -413,6 +419,8 @@ class Parser { $text = strtr( $text, "\x7f", "?" ); $magicScopeVariable = $this->lock(); } + // Strip U+0000 NULL (T159174) + $text = str_replace( "\000", '', $text ); $this->startParse( $title, $options, self::OT_HTML, $clearState ); @@ -435,11 +443,13 @@ class Parser { $this->mRevisionSize = null; } - Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] ); # No more strip! - Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); + Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] ); $text = $this->internalParse( $text ); - Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] ); + Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] ); $text = $this->internalParseHalfParsed( $text, true, $linestart ); @@ -615,8 +625,10 @@ class Parser { * @return string UNSAFE half-parsed HTML */ public function recursiveTagParse( $text, $frame = false ) { - Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); - Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] ); + Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] ); $text = $this->internalParse( $text, false, $frame ); return $text; } @@ -663,8 +675,10 @@ class Parser { if ( $revid !== null ) { $this->mRevisionId = $revid; } - Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); - Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] ); + Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] ); $text = $this->replaceVariables( $text, $frame ); $text = $this->mStripState->unstripBoth( $text ); return $text; @@ -1259,8 +1273,11 @@ class Parser { $origText = $text; + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + # Hook to suspend the parser in this state - if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) { + if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) { return $text; } @@ -1280,16 +1297,16 @@ class Parser { $text = $this->replaceVariables( $text ); } - Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] ); + Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] ); $text = Sanitizer::removeHTMLtags( $text, - [ &$this, 'attributeStripCallback' ], + [ $this, 'attributeStripCallback' ], false, array_keys( $this->mTransparentTagHooks ), [], - [ &$this, 'addTrackingCategory' ] + [ $this, 'addTrackingCategory' ] ); - Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] ); + Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] ); # Tables need to come after variable replacement for things to work # properly; putting them before other transformations should keep @@ -1328,8 +1345,11 @@ class Parser { private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) { $text = $this->mStripState->unstripGeneral( $text ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + if ( $isMain ) { - Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] ); + Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] ); } # Clean up special characters, only run once, next-to-last before doBlockLevels @@ -1368,7 +1388,7 @@ class Parser { $text = $this->mStripState->unstripNoWiki( $text ); if ( $isMain ) { - Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] ); + Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] ); } $text = $this->replaceTransparentTags( $text ); @@ -1409,7 +1429,7 @@ class Parser { } if ( $isMain ) { - Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] ); + Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] ); } return $text; @@ -1434,20 +1454,21 @@ class Parser { $spdash = "(?:-|$space)"; # a dash or a non-newline space $spaces = "$space++"; # possessive match of 1 or more spaces $text = preg_replace_callback( - '!(?: # Start cases - (].*?) | # m[1]: Skip link text - (<.*?>) | # m[2]: Skip stuff inside - # HTML elements' . " - (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links - # m[4]: Post-protocol path - \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number + '!(?: # Start cases + (].*?) | # m[1]: Skip link text + (<.*?>) | # m[2]: Skip stuff inside HTML elements' . " + (\b # m[3]: Free external links + (?i:$prots) + ($addr$urlChar*) # m[4]: Post-protocol path + ) | + \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number ([0-9]+)\b | - \bISBN $spaces ( # m[6]: ISBN, capture number + \bISBN $spaces ( # m[6]: ISBN, capture number (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters [0-9Xx] # check digit )\b - )!xu", [ &$this, 'magicLinkCallback' ], $text ); + )!xu", [ $this, 'magicLinkCallback' ], $text ); return $text; } @@ -1947,18 +1968,6 @@ class Parser { return $attribs; } - /** - * Replace unusual escape codes in a URL with their equivalent characters - * - * @deprecated since 1.24, use normalizeLinkUrl - * @param string $url - * @return string - */ - public static function replaceUnusualEscapes( $url ) { - wfDeprecated( __METHOD__, '1.24' ); - return self::normalizeLinkUrl( $url ); - } - /** * Replace unusual escape codes in a URL with their equivalent characters * @@ -2213,7 +2222,7 @@ class Parser { continue; } - $origLink = $m[1]; + $origLink = ltrim( $m[1], ' ' ); # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these @@ -2479,7 +2488,7 @@ class Parser { * * @private * - * @param int $index + * @param string $index Magic variable identifier as mapped in MagicWord::$mVariableIDs * @param bool|PPFrame $frame * * @throws MWException @@ -2498,18 +2507,21 @@ class Parser { . ' called while parsing (no title set)' ); } + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + /** * Some of these require message or data lookups and can be * expensive to check many times. */ - if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) { + if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) ) { if ( isset( $this->mVarCache[$index] ) ) { return $this->mVarCache[$index]; } } $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); - Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] ); + Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] ); $pageLang = $this->getFunctionLang(); @@ -2822,7 +2834,7 @@ class Parser { $ret = null; Hooks::run( 'ParserGetVariableValueSwitch', - [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ] + [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ] ); return $ret; @@ -3366,7 +3378,10 @@ class Parser { throw new MWException( "Tag hook for $function is not callable\n" ); } - $allArgs = [ &$this ]; + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + + $allArgs = [ &$parser ]; if ( $flags & self::SFH_OBJECT_ARGS ) { # Convert arguments to PPNodes and collect for appending to $allArgs $funcArgs = []; @@ -3875,7 +3890,9 @@ class Parser { throw new MWException( "Tag hook for $name is not callable\n" ); } - $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] ); } else { $output = 'Invalid tag extension name: ' . htmlspecialchars( $name ) . ''; @@ -4450,6 +4467,9 @@ class Parser { $this->startParse( $title, $options, self::OT_WIKI, $clearState ); $this->setUser( $user ); + // Strip U+0000 NULL (T159174) + $text = str_replace( "\000", '', $text ); + // We still normalize line endings for backwards-compatibility // with other code that just calls PST, but this should already // be handled in TextContent subclasses @@ -4978,7 +4998,9 @@ class Parser { } $ig->setAdditionalOptions( $params ); - Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] ); + // Avoid PHP 7.1 warning from passing $this by reference + $parser = $this; + Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] ); $lines = StringUtils::explode( "\n", $text ); foreach ( $lines as $line ) {