X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FParser.php;h=4daa8f2956120db90c1f3619c5a1bbba13eb62fa;hb=328e3473d61a11358d23017597672e7c1b2b44c8;hp=418792a8e44195d94ea858fccdceaf89eed75b15;hpb=eb76c1f16cb5e259e087d4045b80ce5528adef6f;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index 418792a8e4..4daa8f2956 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1,15 +1,15 @@ * - * @addtogroup Parser + * @ingroup Parser */ class Parser { @@ -82,14 +82,17 @@ class Parser const OT_WIKI = 2; const OT_PREPROCESS = 3; const OT_MSG = 3; - + + // Marker Suffix needs to be accessible staticly. + const MARKER_SUFFIX = "-QINU\x7f"; + /**#@+ * @private */ # Persistent: var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables, - $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix, - $mExtLinkBracketedRegex, $mPreprocessor, $mDefaultStripList, $mVarCache, $mConf; + $mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor, + $mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf; # Cleared with clearState(): @@ -98,7 +101,8 @@ class Parser var $mInterwikiLinkHolders, $mLinkHolders; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; // empty-frame expansion cache - var $mTplRedirCache, $mTplDomCache, $mHeadings; + var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; + var $mExpensiveFunctionCount; // number of expensive parser function calls # Temporary # These are variables reset at least once per parse regardless of $clearState @@ -124,18 +128,18 @@ class Parser $this->mFunctionHooks = array(); $this->mFunctionSynonyms = array( 0 => array(), 1 => array() ); $this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' ); - $this->mMarkerSuffix = "-QINU\x7f"; $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. '[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S'; $this->mVarCache = array(); if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; } else { - $this->mPreprocessorClass = 'Preprocessor_DOM'; + $this->mPreprocessorClass = 'Preprocessor_Hash'; } + $this->mMarkerIndex = 0; $this->mFirstCall = true; } - + /** * Do various kinds of initialisation on the first call of the parser */ @@ -144,53 +148,11 @@ class Parser return; } $this->mFirstCall = false; - + wfProfileIn( __METHOD__ ); - global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions; $this->setHook( 'pre', array( $this, 'renderPreTag' ) ); - - # Syntax for arguments (see self::setFunctionHook): - # "name for lookup in localized magic words array", - # function callback, - # optional SFH_NO_HASH to omit the hash from calls (e.g. {{int:...} - # instead of {{#int:...}}) - $this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH ); - $this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH ); - $this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH ); - $this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH ); - $this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH ); - $this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH ); - $this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH ); - $this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH ); - $this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH ); - $this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH ); - $this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH ); - $this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH ); - $this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH ); - $this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH ); - $this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH ); - $this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH ); - $this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH ); - $this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH ); - $this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) ); - $this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH ); - $this->setFunctionHook( 'filepath', array( 'CoreParserFunctions', 'filepath' ), SFH_NO_HASH ); - $this->setFunctionHook( 'tag', array( 'CoreParserFunctions', 'tagObj' ), SFH_OBJECT_ARGS ); - - if ( $wgAllowDisplayTitle ) { - $this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH ); - } - if ( $wgAllowSlowParserFunctions ) { - $this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH ); - } - + CoreParserFunctions::register( $this ); $this->initialiseVariables(); wfRunHooks( 'ParserFirstCallInit', array( &$this ) ); @@ -235,13 +197,14 @@ class Parser * since it shouldn't match when butted up against identifier-like * string constructs. * - * Must not consist of all title characters, or else it will change + * Must not consist of all title characters, or else it will change * the behaviour of in a link. */ #$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString(); # Changed to \x7f to allow XML double-parsing -- TS $this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString(); + # Clear these on every parse, bug 4549 $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array(); @@ -254,6 +217,8 @@ class Parser $this->mPPNodeCount = 0; $this->mDefaultSort = false; $this->mHeadings = array(); + $this->mDoubleUnderscores = array(); + $this->mExpensiveFunctionCount = 0; # Fix cloning if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { @@ -357,6 +322,7 @@ class Parser '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', # french spaces, Guillemet-right '/(\\302\\253) /' => '\\1 ', + '/ (!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874. ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); @@ -426,17 +392,24 @@ class Parser array_values( $tidyregs ), $text ); } + global $wgExpensiveParserFunctionLimit; + if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) { + $this->limitationWarn( 'expensive-parserfunction', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit ); + } wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) ); # Information on include size limits, for the benefit of users who try to skirt them if ( $this->mOptions->getEnableLimitReport() ) { + global $wgExpensiveParserFunctionLimit; $max = $this->mOptions->getMaxIncludeSize(); - $limitReport = - "NewPP limit report\n" . + $PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n"; + $limitReport = + "NewPP limit report\n" . "Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" . "Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" . - "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n"; + "Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n". + $PFreport; wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) ); $text .= "\n\n"; } @@ -495,10 +468,17 @@ class Parser function &getTitle() { return $this->mTitle; } function getOptions() { return $this->mOptions; } + function getRevisionId() { return $this->mRevisionId; } function getFunctionLang() { global $wgLang, $wgContLang; - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + + $target = $this->mOptions->getTargetLanguage(); + if ( $target !== null ) { + return $target; + } else { + return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + } } /** @@ -514,7 +494,7 @@ class Parser /** * Replaces all occurrences of HTML-style comments and the given tags - * in the text with a random marker and returns teh next text. The output + * in the text with a random marker and returns the next text. The output * parameter $matches will be an associative array filled with data in * the form: * 'UNIQ-xxxxx' => array( @@ -558,7 +538,7 @@ class Parser $inside = $p[4]; } - $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; + $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . self::MARKER_SUFFIX; $stripped .= $marker; if ( $close === '/>' ) { @@ -650,9 +630,8 @@ class Parser * @private */ function insertStripItem( $text ) { - static $n = 0; - $rnd = "{$this->mUniqPrefix}-item-$n-{$this->mMarkerSuffix}"; - ++$n; + $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; + $this->mMarkerIndex++; $this->mStripState->general->setPair( $rnd, $text ); return $rnd; } @@ -737,7 +716,7 @@ class Parser /** * Use the HTML tidy PECL extension to use the tidy library in-process, - * saving the overhead of spawning a new process. + * saving the overhead of spawning a new process. * * 'pear install tidy' should be able to compile the extension module. * @@ -760,8 +739,8 @@ class Parser $cleansource = tidy_get_output( $tidy ); } if ( $wgDebugTidy && $tidy->getStatus() > 0 ) { - $cleansource .= "', '-->', $tidy->errorBuffer ) . + $cleansource .= "', '-->', $tidy->errorBuffer ) . "\n-->"; } @@ -811,7 +790,7 @@ class Parser } else if ( count ( $td_history ) == 0 ) { // Don't do any of the following continue; - } else if ( substr ( $line , 0 , 2 ) == '|}' ) { + } else if ( substr ( $line , 0 , 2 ) == '|}' ) { // We are ending a table $line = '' . substr ( $line , 2 ); $last_tag = array_pop ( $last_tag_history ); @@ -990,11 +969,10 @@ class Parser $text = preg_replace( '/(^|\n)-----*/', '\\1
', $text ); - $text = $this->stripToc( $text ); - $this->stripNoGallery( $text ); + $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); if($this->mOptions->getUseDynamicDates()) { - $df =& DateFormatter::getInstance(); + $df = DateFormatter::getInstance(); $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); } $text = $this->doAllQuotes( $text ); @@ -1046,9 +1024,9 @@ class Parser ' ' => '', 'x' => 'X', )); - $titleObj = SpecialPage::getTitleFor( 'Booksources' ); + $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); $text = 'escapeLocalUrl() . "\" class=\"internal\">ISBN $isbn"; } else { if ( substr( $m[0], 0, 3 ) == 'RFC' ) { @@ -1536,6 +1514,8 @@ class Parser } if( is_null( $this->mTitle ) ) { + wfProfileOut( $fname ); + wfProfileOut( $fname.'-setup' ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1625,6 +1605,7 @@ class Parser # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; + wfProfileOut( "$fname-misc" ); continue; } @@ -1771,7 +1752,14 @@ class Parser # Special and Media are pseudo-namespaces; no pages actually exist in them if( $ns == NS_MEDIA ) { - $link = $sk->makeMediaLinkObj( $nt, $text ); + # Give extensions a chance to select the file revision for us + $skip = $time = false; + wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); + if ( $skip ) { + $link = $sk->makeLinkObj( $nt ); + } else { + $link = $sk->makeMediaLinkObj( $nt, $text, $time ); + } # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); @@ -1878,8 +1866,7 @@ class Parser */ function areSubpagesAllowed() { # Some namespaces don't allow subpages - global $wgNamespacesWithSubpages; - return !empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]); + return MWNamespace::hasSubpages( $this->mTitle->getNamespace() ); } /** @@ -2397,7 +2384,7 @@ class Parser $oldtz = getenv( 'TZ' ); putenv( 'TZ='.$wgLocaltimezone ); } - + wfSuppressWarnings(); // E_STRICT system time bitching $localTimestamp = date( 'YmdHis', $ts ); $localMonth = date( 'm', $ts ); @@ -2606,22 +2593,22 @@ class Parser /** * Preprocess some wikitext and return the document tree. - * This is the ghost of replace_variables(). + * This is the ghost of replace_variables(). * * @param string $text The text to parse * @param integer flags Bitwise combination of: - * self::PTD_FOR_INCLUSION Handle / as if the text is being - * included. Default is to assume a direct page view. + * self::PTD_FOR_INCLUSION Handle / as if the text is being + * included. Default is to assume a direct page view. * * The generated DOM tree must depend only on the input text and the flags. - * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. * - * Any flag added to the $flags parameter here, or any other parameter liable to cause a - * change in the DOM tree for a given text, must be passed through the section identifier - * in the section edit link and thus back to extractSections(). + * Any flag added to the $flags parameter here, or any other parameter liable to cause a + * change in the DOM tree for a given text, must be passed through the section identifier + * in the section edit link and thus back to extractSections(). * - * The output of this function is currently only cached in process memory, but a persistent - * cache may be implemented at a later date which takes further advantage of these strict + * The output of this function is currently only cached in process memory, but a persistent + * cache may be implemented at a later date which takes further advantage of these strict * dependency requirements. * * @private @@ -2631,7 +2618,7 @@ class Parser return $dom; } - /* + /* * Return a three-element array: leading whitespace, string contents, trailing whitespace */ public static function splitWhitespace( $s ) { @@ -2708,6 +2695,28 @@ class Parser return $assocArgs; } + /** + * Warn the user when a parser limitation is reached + * Will warn at most once the user per limitation type + * + * @param string $limitationType, should be one of: + * 'expensive-parserfunction' (corresponding messages: 'expensive-parserfunction-warning', 'expensive-parserfunction-category') + * 'post-expand-template-argument' (corresponding messages: 'post-expand-template-argument-warning', 'post-expand-template-argument-category') + * 'post-expand-template-inclusion' (corresponding messages: 'post-expand-template-inclusion-warning', 'post-expand-template-inclusion-category') + * @params int $current, $max When an explicit limit has been + * exceeded, provide the values (optional) + */ + function limitationWarn( $limitationType, $current=null, $max=null) { + $msgName = $limitationType . '-warning'; + //does no harm if $current and $max are present but are unnecessary for the message + $warning = wfMsg( $msgName, $current, $max); + $this->mOutput->addWarning( $warning ); + $cat = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( $limitationType . '-category' ) ); + if ( $cat ) { + $this->mOutput->addCategory( $cat->getDBkey(), $this->getDefaultSort() ); + } + } + /** * Return the text of a template, after recursively * replacing any variables or templates within the template. @@ -2737,8 +2746,8 @@ class Parser # Title object, where $text came from $title = NULL; - # $part1 is the bit before the first |, and must contain only title characters. - # Various prefixes will be stripped from it later. + # $part1 is the bit before the first |, and must contain only title characters. + # Various prefixes will be stripped from it later. $titleWithSpaces = $frame->expand( $piece['title'] ); $part1 = trim( $titleWithSpaces ); $titleText = false; @@ -2753,7 +2762,7 @@ class Parser # SUBST wfProfileIn( __METHOD__.'-modifiers' ); if ( !$found ) { - $mwSubst =& MagicWord::get( 'subst' ); + $mwSubst = MagicWord::get( 'subst' ); if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) { # One of two possibilities is true: # 1) Found SUBST but not in the PST phase @@ -2779,17 +2788,17 @@ class Parser # MSG, MSGNW and RAW if ( !$found ) { # Check for MSGNW: - $mwMsgnw =& MagicWord::get( 'msgnw' ); + $mwMsgnw = MagicWord::get( 'msgnw' ); if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: - $mwMsg =& MagicWord::get( 'msg' ); + $mwMsg = MagicWord::get( 'msg' ); $mwMsg->matchStartAndRemove( $part1 ); } # Check for RAW: - $mwRaw =& MagicWord::get( 'raw' ); + $mwRaw = MagicWord::get( 'raw' ); if ( $mwRaw->matchStartAndRemove( $part1 ) ) { $forceRawInterwiki = true; } @@ -2841,7 +2850,9 @@ class Parser } $result = call_user_func_array( $callback, $allArgs ); $found = true; - + $noparse = true; + $preprocessFlags = 0; + if ( is_array( $result ) ) { if ( isset( $result[0] ) ) { $text = $result[0]; @@ -2854,6 +2865,10 @@ class Parser } else { $text = $result; } + if ( !$noparse ) { + $text = $this->preprocessToDom( $text, $preprocessFlags ); + $isChildObj = true; + } } } wfProfileOut( __METHOD__ . '-pfunc' ); @@ -2983,11 +2998,12 @@ class Parser elseif ( is_string( $text ) && !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ $text = "\n" . $text; } - + if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { # Error, oversize inclusion - $text = "[[$originalTitle]]" . + $text = "[[$originalTitle]]" . $this->insertStripItem( '' ); + $this->limitationWarn( 'post-expand-template-inclusion' ); } if ( $isLocalObj ) { @@ -3007,7 +3023,7 @@ class Parser function getTemplateDom( $title ) { $cacheTitle = $title; $titleText = $title->getPrefixedDBkey(); - + if ( isset( $this->mTplRedirCache[$titleText] ) ) { list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; $title = Title::makeTitle( $ns, $dbk ); @@ -3029,7 +3045,7 @@ class Parser $this->mTplDomCache[ $titleText ] = $dom; if (! $title->equals($cacheTitle)) { - $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = + $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = array( $title->getNamespace(),$cdb = $title->getDBkey() ); } @@ -3041,7 +3057,7 @@ class Parser */ function fetchTemplateAndTitle( $title ) { $templateCb = $this->mOptions->getTemplateCallback(); - $stuff = call_user_func( $templateCb, $title ); + $stuff = call_user_func( $templateCb, $title, $this ); $text = $stuff['text']; $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; if ( isset( $stuff['deps'] ) ) { @@ -3061,17 +3077,17 @@ class Parser * Static function to get a template * Can be overridden via ParserOptions::setTemplateCallback(). */ - static function statelessFetchTemplate( $title ) { + static function statelessFetchTemplate( $title, $parser=false ) { $text = $skip = false; $finalTitle = $title; $deps = array(); - + // Loop to fetch the article, with up to 1 redirect for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { # Give extensions a chance to select the revision instead $id = false; // Assume current - wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( false, &$title, &$skip, &$id ) ); - + wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( $parser, &$title, &$skip, &$id ) ); + if( $skip ) { $text = false; $deps[] = array( @@ -3082,10 +3098,15 @@ class Parser } $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); $rev_id = $rev ? $rev->getId() : 0; + // If there is no current revision, there is no page + if( $id === false && !$rev ) { + $linkCache = LinkCache::singleton(); + $linkCache->addBadLinkObj( $title ); + } - $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), + $deps[] = array( + 'title' => $title, + 'page_id' => $title->getArticleID(), 'rev_id' => $rev_id ); if( $rev ) { @@ -3169,10 +3190,10 @@ class Parser $argName = trim( $nameWithSpaces ); $object = false; $text = $frame->getArgument( $argName ); - if ( $text === false && $parts->getLength() > 0 - && ( - $this->ot['html'] - || $this->ot['pre'] + if ( $text === false && $parts->getLength() > 0 + && ( + $this->ot['html'] + || $this->ot['pre'] || ( $this->ot['wiki'] && $frame->isTemplate() ) ) ) { @@ -3181,6 +3202,7 @@ class Parser } if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { $error = ''; + $this->limitationWarn( 'post-expand-template-argument' ); } if ( $text === false && $object === false ) { @@ -3214,14 +3236,13 @@ class Parser */ function extensionSubstitution( $params, $frame ) { global $wgRawHtml, $wgContLang; - static $n = 1; $name = $frame->expand( $params['name'] ); $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); - $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; - + $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; + if ( $this->ot['html'] ) { $name = strtolower( $name ); @@ -3256,7 +3277,8 @@ class Parser $output = call_user_func_array( $this->mTagHooks[$name], array( $content, $attributes, $this ) ); } else { - throw new MWException( "Invalid call hook $name" ); + $output = 'Invalid tag extension name: ' . + htmlspecialchars( $name ) . ''; } } } else { @@ -3302,32 +3324,25 @@ class Parser } /** - * Detect __NOGALLERY__ magic word and set a placeholder + * Increment the expensive function count + * + * @return boolean False if the limit has been exceeded */ - function stripNoGallery( &$text ) { - # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML, - # do not add TOC - $mw = MagicWord::get( 'nogallery' ); - $this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ; + function incrementExpensiveFunctionCount() { + global $wgExpensiveParserFunctionLimit; + $this->mExpensiveFunctionCount++; + if($this->mExpensiveFunctionCount <= $wgExpensiveParserFunctionLimit) { + return true; + } + return false; } /** - * Find the first __TOC__ magic word and set a - * placeholder that will then be replaced by the real TOC in - * ->formatHeadings, this works because at this points real - * comments will have already been discarded by the sanitizer. - * - * Any additional __TOC__ magic words left over will be discarded - * as there can only be one TOC on the page. + * Strip double-underscore items like __NOGALLERY__ and __NOTOC__ + * Fills $this->mDoubleUnderscores, returns the modified text */ - function stripToc( $text ) { - # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, - # do not add TOC - $mw = MagicWord::get( 'notoc' ); - if( $mw->matchAndRemove( $text ) ) { - $this->mShowToc = false; - } - + function doDoubleUnderscore( $text ) { + // The position of __TOC__ needs to be recorded $mw = MagicWord::get( 'toc' ); if( $mw->match( $text ) ) { $this->mShowToc = true; @@ -3339,6 +3354,27 @@ class Parser // Only keep the first one. $text = $mw->replace( '', $text ); } + + // Now match and remove the rest of them + $mwa = MagicWord::getDoubleUnderscoreArray(); + $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); + + if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { + $this->mOutput->mNoGallery = true; + } + if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { + $this->mShowToc = false; + } + if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) { + $this->mOutput->setProperty( 'hiddencat', 'y' ); + + $containerCategory = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( 'hidden-category-category' ) ); + if ( $containerCategory ) { + $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); + } else { + wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); + } + } return $text; } @@ -3367,8 +3403,7 @@ class Parser } # Inhibit editsection links if requested in the page - $esw =& MagicWord::get( 'noeditsection' ); - if( $esw->matchAndRemove( $text ) ) { + if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { $showEditLink = 0; } @@ -3384,14 +3419,13 @@ class Parser # Allow user to stipulate that a page should have a "new section" # link added via __NEWSECTIONLINK__ - $mw =& MagicWord::get( 'newsectionlink' ); - if( $mw->matchAndRemove( $text ) ) + if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { $this->mOutput->setNewSection( true ); + } # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header - $mw =& MagicWord::get( 'forcetoc' ); - if ($mw->matchAndRemove( $text ) ) { + if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { $this->mShowToc = true; $enoughToc = true; } @@ -3415,7 +3449,7 @@ class Parser $prevlevel = 0; $toclevel = 0; $prevtoclevel = 0; - $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-{$this->mMarkerSuffix}"; + $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->mTitle->getPrefixedDBkey(); $tocraw = array(); @@ -3474,6 +3508,7 @@ class Parser if($prevtoclevel < $wgMaxTocLevel) { # Unindent only if the previous toc level was shown :p $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); + $prevtoclevel = $toclevel; } else { $toc .= $sk->tocLineEnd(); } @@ -3532,11 +3567,12 @@ class Parser # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; $safeHeadline = Sanitizer::escapeId( $safeHeadline ); - $refers[$headlineCount] = $safeHeadline; + # HTML names must be case-insensitively unique (bug 10721) + $arrayKey = strtolower( $safeHeadline ); # count how many in assoc. array so we can track dupes in anchors - isset( $refers[$safeHeadline] ) ? $refers[$safeHeadline]++ : $refers[$safeHeadline] = 1; - $refcount[$headlineCount] = $refers[$safeHeadline]; + isset( $refers[$arrayKey] ) ? $refers[$arrayKey]++ : $refers[$arrayKey] = 1; + $refcount[$headlineCount] = $refers[$arrayKey]; # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { @@ -3556,7 +3592,7 @@ class Parser # give headline the correct tag if( $showEditLink && $sectionIndex !== false ) { if( $isTemplate ) { - # Put a T flag in the section identifier, to indicate to extractSections() + # Put a T flag in the section identifier, to indicate to extractSections() # that sections inside should be counted. $editlink = $sk->editSectionLinkForOther($titleText, "T-$sectionIndex"); } else { @@ -3576,7 +3612,7 @@ class Parser if( $numVisible < 1 ) { $enoughToc = false; } - + if( $enoughToc ) { if( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); @@ -3657,16 +3693,20 @@ class Parser * the database, we use $wgContLang here in order to give * everyone the same signature and use the default one rather * than the one selected in each user's preferences. + * + * (see also bug 12815) */ + $ts = $this->mOptions->getTimestamp(); + $tz = 'UTC'; if ( isset( $wgLocaltimezone ) ) { + $unixts = wfTimestamp( TS_UNIX, $ts ); $oldtz = getenv( 'TZ' ); putenv( 'TZ='.$wgLocaltimezone ); - } - $d = $wgContLang->timeanddate( $this->mOptions->getTimestamp(), false, false) . - ' (' . date( 'T' ) . ')'; - if ( isset( $wgLocaltimezone ) ) { + $ts = date( 'YmdHis', $unixts ); + $tz = date( 'T', $unixts ); # might vary on DST changeover! putenv( 'TZ='.$oldtz ); } + $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tz)"; # Variable replacement # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags @@ -3684,7 +3724,7 @@ class Parser # global $wgLegalTitleChars; $tc = "[$wgLegalTitleChars]"; - $nc = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii! + $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] @@ -3721,11 +3761,11 @@ class Parser */ function getUserSig( &$user ) { global $wgMaxSigChars; - + $username = $user->getName(); $nickname = $user->getOption( 'nickname' ); $nickname = $nickname === '' ? $username : $nickname; - + if( mb_strlen( $nickname ) > $wgMaxSigChars ) { $nickname = $username; wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); @@ -3873,7 +3913,9 @@ class Parser $tag = strtolower( $tag ); $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; $this->mTagHooks[$tag] = $callback; - $this->mStripList[] = $tag; + if( !in_array( $tag, $this->mStripList ) ) { + $this->mStripList[] = $tag; + } return $oldVal; } @@ -3972,7 +4014,7 @@ class Parser $colours = array(); $linkcolour_ids = array(); $sk = $this->mOptions->getSkin(); - $linkCache =& LinkCache::singleton(); + $linkCache = LinkCache::singleton(); if ( !empty( $this->mLinkHolders['namespaces'] ) ) { wfProfileIn( $fname.'-check' ); @@ -4011,10 +4053,7 @@ class Parser # Not in the link cache, add it to the query if ( !isset( $current ) ) { $current = $ns; - $query = "SELECT page_id, page_namespace, page_title"; - if ( $threshold > 0 ) { - $query .= ', page_len, page_is_redirect'; - } + $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; } elseif ( $current != $ns ) { $current = $ns; @@ -4039,9 +4078,9 @@ class Parser while ( $s = $dbr->fetchObject($res) ) { $title = Title::makeTitle( $s->page_namespace, $s->page_title ); $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title ); + $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); $this->mOutput->addLink( $title, $s->page_id ); - $colours[$pdbk] = $sk->getLinkColour( $s, $threshold ); + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); //add id to the extension todolist $linkcolour_ids[$s->page_id] = $pdbk; } @@ -4102,10 +4141,7 @@ class Parser // construct query $titleClause = $linkBatch->constructSet('page', $dbr); - $variantQuery = "SELECT page_id, page_namespace, page_title"; - if ( $threshold > 0 ) { - $variantQuery .= ', page_len, page_is_redirect'; - } + $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; $variantQuery .= " FROM $page WHERE $titleClause"; if ( $options & RLH_FOR_UPDATE ) { @@ -4124,7 +4160,7 @@ class Parser $holderKeys = array(); if(isset($variantMap[$varPdbk])){ $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle ); + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); $this->mOutput->addLink( $variantTitle, $s->page_id ); } @@ -4142,7 +4178,7 @@ class Parser // set pdbk and colour $pdbks[$key] = $varPdbk; - $colours[$varPdbk] = $sk->getLinkColour( $s, $threshold ); + $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); $linkcolour_ids[$s->page_id] = $pdbk; } wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); @@ -4314,7 +4350,7 @@ class Parser if( isset( $params['heights'] ) ) { $ig->setHeights( $params['heights'] ); } - + wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); $lines = explode( "\n", $text ); @@ -4327,6 +4363,9 @@ class Parser if ( count( $matches ) == 0 ) { continue; } + + if ( strpos( $matches[0], '%' ) !== false ) + $matches[1] = urldecode( $matches[1] ); $tp = Title::newFromText( $matches[1] ); $nt =& $tp; if( is_null( $nt ) ) { @@ -4361,9 +4400,9 @@ class Parser // Initialise static lists static $internalParamNames = array( 'horizAlign' => array( 'left', 'right', 'center', 'none' ), - 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', + 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ), - 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', + 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', 'upright', 'border' ), ); static $internalParamMap; @@ -4395,8 +4434,6 @@ class Parser * Parse image options text and use it to make an image */ function makeImage( $title, $options ) { - # @TODO: let the MediaHandler specify its transform parameters - # # Check if the options text is of the form "options|alt text" # Options are: # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang @@ -4418,13 +4455,13 @@ class Parser # * middle # * bottom # * text-bottom - + $parts = array_map( 'trim', explode( '|', $options) ); $sk = $this->mOptions->getSkin(); # Give extensions a chance to select the file revision for us - $skip = $time = false; - wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time ) ); + $skip = $time = $descQuery = false; + wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$title, &$skip, &$time, &$descQuery ) ); if ( $skip ) { return $sk->makeLinkObj( $title ); @@ -4438,25 +4475,61 @@ class Parser # Process the input parameters $caption = ''; - $params = array( 'frame' => array(), 'handler' => array(), + $params = array( 'frame' => array(), 'handler' => array(), 'horizAlign' => array(), 'vertAlign' => array() ); foreach( $parts as $part ) { list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); - if ( isset( $paramMap[$magicName] ) ) { + $validated = false; + if( isset( $paramMap[$magicName] ) ) { list( $type, $paramName ) = $paramMap[$magicName]; - $params[$type][$paramName] = $value; - + // Special case; width and height come in one variable together if( $type == 'handler' && $paramName == 'width' ) { $m = array(); - if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $value, $m ) ) { - $params[$type]['width'] = intval( $m[1] ); - $params[$type]['height'] = intval( $m[2] ); + # (bug 13500) In both cases (width/height and width only), + # permit trailing "px" for backward compatibility. + if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { + $width = intval( $m[1] ); + $height = intval( $m[2] ); + if ( $handler->validateParam( 'width', $width ) ) { + $params[$type]['width'] = $width; + $validated = true; + } + if ( $handler->validateParam( 'height', $height ) ) { + $params[$type]['height'] = $height; + $validated = true; + } + } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { + $width = intval( $value ); + if ( $handler->validateParam( 'width', $width ) ) { + $params[$type]['width'] = $width; + $validated = true; + } + } // else no validation -- bug 13436 + } else { + if ( $type == 'handler' ) { + # Validate handler parameter + $validated = $handler->validateParam( $paramName, $value ); } else { - $params[$type]['width'] = intval( $value ); + # Validate internal parameters + switch( $paramName ) { + case "manualthumb": + /// @fixme - possibly check validity here? + /// downstream behavior seems odd with missing manual thumbs. + $validated = true; + break; + default: + // Most other things appear to be empty or numeric... + $validated = ( $value === false || is_numeric( trim( $value ) ) ); + } + } + + if ( $validated ) { + $params[$type][$paramName] = $value; } } - } else { + } + if ( !$validated ) { $caption = $part; } } @@ -4469,15 +4542,6 @@ class Parser $params['frame']['valign'] = key( $params['vertAlign'] ); } - # Validate the handler parameters - if ( $handler ) { - foreach ( $params['handler'] as $name => $value ) { - if ( !$handler->validateParam( $name, $value ) ) { - unset( $params['handler'][$name] ); - } - } - } - # Strip bad stuff out of the alt text $alt = $this->replaceLinkHoldersText( $caption ); @@ -4490,8 +4554,10 @@ class Parser $params['frame']['alt'] = $alt; $params['frame']['caption'] = $caption; + wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); + # Linker does the rest - $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'] ); + $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time, $descQuery ); # Give the handler a chance to modify the parser object if ( $handler ) { @@ -4552,13 +4618,13 @@ class Parser * - - ... -
* * Currently the only recognised flag is "T", which means the target section number - * was derived during a template inclusion parse, in other words this is a template - * section edit link. If no flags are given, it was an ordinary section edit link. - * This flag is required to avoid a section numbering mismatch when a section is + * was derived during a template inclusion parse, in other words this is a template + * section edit link. If no flags are given, it was an ordinary section edit link. + * This flag is required to avoid a section numbering mismatch when a section is * enclosed by (bug 6563). * - * The section number 0 pulls the text before the first heading; other numbers will - * pull the given section along with its lower-level subsections. If the section is + * The section number 0 pulls the text before the first heading; other numbers will + * pull the given section along with its lower-level subsections. If the section is * not found, $mode=get will return $newtext, and $mode=replace will return $text. * * @param string $mode One of "get" or "replace" @@ -4634,11 +4700,11 @@ class Parser } $node = $node->getNextSibling(); } while ( $node ); - + // Write out the remainder (in replace mode only) if ( $mode == 'replace' ) { // Output the replacement text - // Add two newlines on -- trailing whitespace in $newText is conventionally + // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap $outText .= $newText . "\n\n"; while ( $node ) { @@ -4649,7 +4715,7 @@ class Parser if ( is_string( $outText ) ) { // Re-insert stripped tags - $outText = trim( $this->mStripState->unstripBoth( $outText ) ); + $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); } return $outText; @@ -4732,8 +4798,8 @@ class Parser } /** - * Try to guess the section anchor name based on a wikitext fragment - * presumably extracted from a heading, for example "Header" from + * Try to guess the section anchor name based on a wikitext fragment + * presumably extracted from a heading, for example "Header" from * "== Header ==". */ public function guessSectionNameFromWikiText( $text ) { @@ -4756,14 +4822,14 @@ class Parser /** * Strips a text string of wikitext for use in a section anchor - * + * * Accepts a text string and then removes all wikitext from the * string and leaves only the resultant text (i.e. the result of * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended * to create valid section anchors by mimicing the output of the * parser when headings are parsed. - * + * * @param $text string Text string to be stripped of wikitext * for use in a Section anchor * @return Filtered text string @@ -4772,15 +4838,15 @@ class Parser # Strip internal link markup $text = preg_replace('/\[\[:?([^[|]+)\|([^[]+)\]\]/','$2',$text); $text = preg_replace('/\[\[:?([^[]+)\|?\]\]/','$1',$text); - + # Strip external link markup (FIXME: Not Tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace('/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/','$2',$text); - + # Parse wikitext quotes (italics & bold) $text = $this->doQuotes($text); - + # Strip HTML tags $text = StringUtils::delimiterReplace( '<', '>', '', $text ); return $text; @@ -4832,12 +4898,12 @@ class Parser break; } else { $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); - $markerEnd = strpos( $s, $this->mMarkerSuffix, $markerStart ); + $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); if ( $markerEnd === false ) { $out .= substr( $s, $markerStart ); break; } else { - $markerEnd += strlen( $this->mMarkerSuffix ); + $markerEnd += strlen( self::MARKER_SUFFIX ); $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); $i = $markerEnd; } @@ -4849,7 +4915,7 @@ class Parser /** * @todo document, briefly. - * @addtogroup Parser + * @ingroup Parser */ class StripState { var $general, $nowiki; @@ -4893,12 +4959,12 @@ class StripState { /** * @todo document, briefly. - * @addtogroup Parser + * @ingroup Parser */ class OnlyIncludeReplacer { var $output = ''; - function replace( $matches ) { + function replace( $matches ) { if ( substr( $matches[1], -1 ) == "\n" ) { $this->output .= substr( $matches[1], 0, -1 ); } else { @@ -4906,4 +4972,3 @@ class OnlyIncludeReplacer { } } } -