merge latest master into Wikidata branch
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 8e9444a..1902443 100644 (file)
@@ -163,7 +163,8 @@ class Parser {
        var $mLinkHolders;
 
        var $mLinkID;
-       var $mIncludeSizes, $mPPNodeCount, $mHighestExpansionDepth, $mDefaultSort;
+       var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
+       var $mDefaultSort;
        var $mTplExpandCache; # empty-frame expansion cache
        var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
        var $mExpensiveFunctionCount; # number of expensive parser function calls
@@ -199,6 +200,13 @@ class Parser {
         */
        var $mUniqPrefix;
 
+       /**
+        * @var Array with the language name of each language link (i.e. the
+        * interwiki prefix) in the key, value arbitrary. Used to avoid sending
+        * duplicate language links to the ParserOutput.
+        */
+       var $mLangLinkLanguages;
+
        /**
         * Constructor
         *
@@ -207,7 +215,7 @@ class Parser {
        public function __construct( $conf = array() ) {
                $this->mConf = $conf;
                $this->mUrlProtocols = wfUrlProtocols();
-               $this->mExtLinkBracketedRegex = '/\[((' . $this->mUrlProtocols . ')'.
+               $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')'.
                        self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
                if ( isset( $conf['preprocessorClass'] ) ) {
                        $this->mPreprocessorClass = $conf['preprocessorClass'];
@@ -281,6 +289,7 @@ class Parser {
                        $this->mRevisionId = $this->mRevisionUser = null;
                $this->mVarCache = array();
                $this->mUser = null;
+               $this->mLangLinkLanguages = array();
 
                /**
                 * Prefix for temporary replacement strings for the multipass parser.
@@ -306,6 +315,7 @@ class Parser {
                        'arg' => 0,
                );
                $this->mPPNodeCount = 0;
+               $this->mGeneratedPPNodeCount = 0;
                $this->mHighestExpansionDepth = 0;
                $this->mDefaultSort = false;
                $this->mHeadings = array();
@@ -339,7 +349,7 @@ class Parser {
                 * to internalParse() which does all the real work.
                 */
 
-               global $wgUseTidy, $wgAlwaysUseTidy, $wgDisableLangConversion, $wgDisableTitleConversion;
+               global $wgUseTidy, $wgAlwaysUseTidy;
                $fname = __METHOD__.'-' . wfGetCaller();
                wfProfileIn( __METHOD__ );
                wfProfileIn( $fname );
@@ -392,9 +402,8 @@ class Parser {
                 * c) It's a conversion table
                 * d) it is an interface message (which is in the user language)
                 */
-               if ( !( $wgDisableLangConversion
-                               || isset( $this->mDoubleUnderscores['nocontentconvert'] )
-                               || $this->mTitle->isConversionTable() ) )
+               if ( !( $options->getDisableContentConversion()
+                               || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) )
                {
                        # Run convert unconditionally in 1.18-compatible mode
                        global $wgBug34832TransitionalRollback;
@@ -413,8 +422,7 @@ class Parser {
                 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
                 * automatic link conversion.
                 */
-               if ( !( $wgDisableLangConversion
-                               || $wgDisableTitleConversion
+               if ( !( $options->getDisableTitleConversion()
                                || isset( $this->mDoubleUnderscores['nocontentconvert'] )
                                || isset( $this->mDoubleUnderscores['notitleconvert'] )
                                || $this->mOutput->getDisplayTitle() !== false ) )
@@ -482,13 +490,20 @@ class Parser {
                        $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/{$this->mOptions->getExpensiveParserFunctionLimit()}\n";
                        $limitReport =
                                "NewPP limit report\n" .
-                               "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
+                               "Preprocessor visited node count: {$this->mPPNodeCount}/{$this->mOptions->getMaxPPNodeCount()}\n" .
+                               "Preprocessor generated node count: " .
+                                       "{$this->mGeneratedPPNodeCount}/{$this->mOptions->getMaxGeneratedPPNodeCount()}\n" .
                                "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
                                "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
                                "Highest expansion depth: {$this->mHighestExpansionDepth}/{$this->mOptions->getMaxPPExpandDepth()}\n".
                                $PFreport;
                        wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
                        $text .= "\n<!-- \n$limitReport-->\n";
+
+                       if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
+                               wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
+                                       $this->mTitle->getPrefixedDBkey() );
+                       }
                }
                $this->mOutput->setText( $text );
 
@@ -1187,7 +1202,7 @@ class Parser {
                        '!(?:                           # Start cases
                                (<a[ \t\r\n>].*?</a>) |     # m[1]: Skip link text
                                (<.*?>) |                   # m[2]: Skip stuff inside HTML elements' . "
-                               (\\b(?:$prots)$urlChar+) |  # m[3]: Free external links" . '
+                               (\\b(?i:$prots)$urlChar+) |  # m[3]: Free external links" . '
                                (?:RFC|PMID)\s+([0-9]+) |   # m[4]: RFC or PMID, capture number
                                ISBN\s+(\b                  # m[5]: ISBN, capture number
                                        (?: 97[89] [\ \-]? )?   # optional 13-digit ISBN prefix
@@ -1853,7 +1868,7 @@ class Parser {
                        # Don't allow internal links to pages containing
                        # PROTO: where PROTO is a valid URL protocol; these
                        # should be external links.
-                       if ( preg_match( '/^(?:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
+                       if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $m[1] ) ) {
                                $s .= $prefix . '[[' . $line ;
                                wfProfileOut( __METHOD__."-misc" );
                                continue;
@@ -1946,7 +1961,14 @@ class Parser {
                                # Interwikis
                                wfProfileIn( __METHOD__."-interwiki" );
                                if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && Language::fetchLanguageName( $iw, null, 'mw' ) ) {
-                                       $this->mOutput->addLanguageLink( $nt->getFullText() );
+                                       // XXX: the above check prevents links to sites with identifiers that are not language codes
+
+                                       # Bug 24502: filter duplicates
+                                       if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
+                                               $this->mLangLinkLanguages[$iw] = true;
+                                               $this->mOutput->addLanguageLink( $nt->getFullText() );
+                                       }
+
                                        $s = rtrim( $s . $prefix );
                                        $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
                                        wfProfileOut( __METHOD__."-interwiki" );
@@ -2090,7 +2112,7 @@ class Parser {
         * @return String: less-or-more HTML with NOPARSE bits
         */
        function armorLinks( $text ) {
-               return preg_replace( '/\b(' . $this->mUrlProtocols . ')/',
+               return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
                        "{$this->mUniqPrefix}NOPARSE$1", $text );
        }
 
@@ -3586,7 +3608,13 @@ class Parser {
                        }
 
                        if ( $rev ) {
-                               $text = $rev->getText();
+                               $content = $rev->getContent();
+                               $text = $content->getWikitextForTransclusion();
+
+                               if ( $text === false || $text === null ) {
+                                       $text = false;
+                                       break;
+                               }
                        } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
                                global $wgContLang;
                                $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
@@ -3594,16 +3622,17 @@ class Parser {
                                        $text = false;
                                        break;
                                }
+                               $content = $message->content();
                                $text = $message->plain();
                        } else {
                                break;
                        }
-                       if ( $text === false ) {
+                       if ( !$content ) {
                                break;
                        }
                        # Redirect?
                        $finalTitle = $title;
-                       $title = Title::newFromRedirect( $text );
+                       $title = $content->getRedirectTarget();
                }
                return array(
                        'text' => $text,
@@ -3692,8 +3721,13 @@ class Parser {
                        return $obj->tc_contents;
                }
 
-               $text = Http::get( $url );
-               if ( !$text ) {
+               $req = MWHttpRequest::factory( $url );
+               $status = $req->execute(); // Status object
+               if ( $status->isOK() ) {
+                       $text = $req->getContent();
+               } elseif ( $req->getStatus() != 200 ) { // Though we failed to fetch the content, this status is useless.
+                       return wfMessage( 'scarytranscludefailed-httpstatus', $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
+               } else {
                        return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
                }
 
@@ -4137,10 +4171,16 @@ class Parser {
                        $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
 
                        # Strip out HTML (first regex removes any tag not allowed)
-                       # Allowed tags are <sup> and <sub> (bug 8393), <i> (bug 26375) and <b> (r105284)
-                       # We strip any parameter from accepted tags (second regex)
+                       # Allowed tags are:
+                       # * <sup> and <sub> (bug 8393)
+                       # * <i> (bug 26375)
+                       # * <b> (r105284)
+                       # * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
+                       #
+                       # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
+                       # to allow setting directionality in toc items.
                        $tocline = preg_replace(
-                               array( '#<(?!/?(sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(sup|sub|i|b))(?: .*?)?'.'>#' ),
+                               array( '#<(?!/?(span|sup|sub|i|b)(?: [^>]*)?>).*?'.'>#', '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|i|b))(?: .*?)?'.'>#' ),
                                array( '',                          '<$1>' ),
                                $safeHeadline
                        );
@@ -5095,8 +5135,8 @@ class Parser {
                                                                $paramName = 'no-link';
                                                                $value = true;
                                                                $validated = true;
-                                                       } elseif ( preg_match( "/^$prots/", $value ) ) {
-                                                               if ( preg_match( "/^($prots)$chars+$/u", $value, $m ) ) {
+                                                       } elseif ( preg_match( "/^(?i)$prots/", $value ) ) {
+                                                               if ( preg_match( "/^((?i)$prots)$chars+$/u", $value, $m ) ) {
                                                                        $paramName = 'link-url';
                                                                        $this->mOutput->addExternalLink( $value );
                                                                        if ( $this->mOptions->getExternalLinkTarget() ) {
@@ -5622,7 +5662,7 @@ class Parser {
                # @todo FIXME: Not tolerant to blank link text
                # I.E. [http://www.mediawiki.org] will render as [1] or something depending
                # on how many empty links there are on the page - need to figure that out.
-               $text = preg_replace( '/\[(?:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
+               $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
 
                # Parse wikitext quotes (italics & bold)
                $text = $this->doQuotes( $text );