X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FParser.php;h=a5162d6aa79659e62965c0c8266114c45b0bad23;hb=a8bbdd8d7ed77ca81cbfe37b49d744fdfc6f64d0;hp=2de8f9c7e4ba60c6646afda91ffe87a46cc03bee;hpb=a6101521a245f750d91509fea75dcf4fc0b4572f;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index 2de8f9c7e4..a5162d6aa7 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -9,21 +9,23 @@ /** - * PHP Parser - Processes wiki markup (which uses a more user-friendly + * PHP Parser - Processes wiki markup (which uses a more user-friendly * syntax, such as "[[link]]" for making links), and provides a one-way * transformation of that wiki markup it into XHTML output / markup * (which in turn the browser understands, and can display). * *
- * There are four main entry points into the Parser class:
+ * There are five main entry points into the Parser class:
  * parse()
  *   produces HTML output
  * preSaveTransform().
  *   produces altered wiki markup.
- * transformMsg()
- *   performs brace substitution on MediaWiki messages
  * preprocess()
  *   removes HTML comments and expands templates
+ * cleanSig()
+ *   Cleans a signature before saving it to preferences
+ * extractSections()
+ *   Extracts sections from an article for section editing
  *
  * Globals used:
  *    objects:   $wgLang, $wgContLang
@@ -48,7 +50,7 @@ class Parser
 	 * changes in an incompatible way, so the parser cache
 	 * can automatically discard old data.
 	 */
-	const VERSION = '1.6.2';
+	const VERSION = '1.6.4';
 
 	# Flags for Parser::setFunctionHook
 	# Also available as global constants from Defines.php
@@ -71,23 +73,36 @@ class Parser
 	const COLON_STATE_COMMENTDASH = 6;
 	const COLON_STATE_COMMENTDASHDASH = 7;
 
+	// Flags for preprocessToDom
+	const PTD_FOR_INCLUSION = 1;
+
+	// Allowed values for $this->mOutputType
+	// Parameter to startExternalParse().
+	const OT_HTML = 1;
+	const OT_WIKI = 2;
+	const OT_PREPROCESS = 3;
+	const OT_MSG = 3;
+
+	// Marker Suffix needs to be accessible staticly.
+	const MARKER_SUFFIX = "-QINU\x7f";
+
 	/**#@+
 	 * @private
 	 */
 	# Persistent:
 	var $mTagHooks, $mTransparentTagHooks, $mFunctionHooks, $mFunctionSynonyms, $mVariables,
-		$mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerSuffix,
-		$mExtLinkBracketedRegex;
-	
+		$mImageParams, $mImageParamsMagicArray, $mStripList, $mMarkerIndex, $mPreprocessor,
+		$mExtLinkBracketedRegex, $mDefaultStripList, $mVarCache, $mConf;
+
+
 	# Cleared with clearState():
 	var $mOutput, $mAutonumber, $mDTopen, $mStripState;
 	var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
 	var $mInterwikiLinkHolders, $mLinkHolders;
 	var $mIncludeSizes, $mPPNodeCount, $mDefaultSort;
-	var $mTplExpandCache,// empty-frame expansion cache
-	    $mTemplatePath;	// stores an unsorted hash of all the templates already loaded
-		                // in this path. Used for loop detection.
-	var $mTplRedirCache, $mTplDomCache, $mHeadings;
+	var $mTplExpandCache; // empty-frame expansion cache
+	var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
+	var $mExpensiveFunctionCount; // number of expensive parser function calls
 
 	# Temporary
 	# These are variables reset at least once per parse regardless of $clearState
@@ -107,17 +122,24 @@ class Parser
 	 * @public
 	 */
 	function __construct( $conf = array() ) {
+		$this->mConf = $conf;
 		$this->mTagHooks = array();
 		$this->mTransparentTagHooks = array();
 		$this->mFunctionHooks = array();
 		$this->mFunctionSynonyms = array( 0 => array(), 1 => array() );
-		$this->mStripList = array( 'nowiki', 'gallery' );
-		$this->mMarkerSuffix = "-QINU\x7f";
+		$this->mDefaultStripList = $this->mStripList = array( 'nowiki', 'gallery' );
 		$this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'.
 			'[^][<>"\\x00-\\x20\\x7F]+) *([^\]\\x0a\\x0d]*?)\]/S';
+		$this->mVarCache = array();
+		if ( isset( $conf['preprocessorClass'] ) ) {
+			$this->mPreprocessorClass = $conf['preprocessorClass'];
+		} else {
+			$this->mPreprocessorClass = 'Preprocessor_DOM';
+		}
+		$this->mMarkerIndex = 0;
 		$this->mFirstCall = true;
 	}
-	
+
 	/**
 	 * Do various kinds of initialisation on the first call of the parser
 	 */
@@ -125,49 +147,15 @@ class Parser
 		if ( !$this->mFirstCall ) {
 			return;
 		}
-		
+		$this->mFirstCall = false;
+
 		wfProfileIn( __METHOD__ );
-		global $wgAllowDisplayTitle, $wgAllowSlowParserFunctions;
 
 		$this->setHook( 'pre', array( $this, 'renderPreTag' ) );
-		
-		$this->setFunctionHook( 'int', array( 'CoreParserFunctions', 'intFunction' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'ns', array( 'CoreParserFunctions', 'ns' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'lc', array( 'CoreParserFunctions', 'lc' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'uc', array( 'CoreParserFunctions', 'uc' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'localurl', array( 'CoreParserFunctions', 'localurl' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'localurle', array( 'CoreParserFunctions', 'localurle' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'grammar', array( 'CoreParserFunctions', 'grammar' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'plural', array( 'CoreParserFunctions', 'plural' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'numberofedits', array( 'CoreParserFunctions', 'numberofedits' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'language', array( 'CoreParserFunctions', 'language' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'padleft', array( 'CoreParserFunctions', 'padleft' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'padright', array( 'CoreParserFunctions', 'padright' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'special', array( 'CoreParserFunctions', 'special' ) );
-		$this->setFunctionHook( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ), SFH_NO_HASH );
-		$this->setFunctionHook( 'filepath', array( 'CoreParserFunctions', 'filepath' ), SFH_NO_HASH );
-
-		if ( $wgAllowDisplayTitle ) {
-			$this->setFunctionHook( 'displaytitle', array( 'CoreParserFunctions', 'displaytitle' ), SFH_NO_HASH );
-		}
-		if ( $wgAllowSlowParserFunctions ) {
-			$this->setFunctionHook( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ), SFH_NO_HASH );
-		}
-
+		CoreParserFunctions::register( $this );
 		$this->initialiseVariables();
-		$this->mFirstCall = false;
+
+		wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
 		wfProfileOut( __METHOD__ );
 	}
 
@@ -209,14 +197,15 @@ class Parser
 		 * since it shouldn't match when butted up against identifier-like
 		 * string constructs.
 		 *
-		 * Must not consist of all title characters, or else it will change 
+		 * Must not consist of all title characters, or else it will change
 		 * the behaviour of  in a link.
 		 */
 		#$this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
+		# Changed to \x7f to allow XML double-parsing -- TS
 		$this->mUniqPrefix = "\x7fUNIQ" . Parser::getRandomString();
 
+
 		# Clear these on every parse, bug 4549
- 		$this->mTemplatePath = array();
 		$this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
 
 		$this->mShowToc = true;
@@ -228,6 +217,13 @@ class Parser
 		$this->mPPNodeCount = 0;
 		$this->mDefaultSort = false;
 		$this->mHeadings = array();
+		$this->mDoubleUnderscores = array();
+		$this->mExpensiveFunctionCount = 0;
+
+		# Fix cloning
+		if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
+			$this->mPreprocessor = null;
+		}
 
 		wfRunHooks( 'ParserClearState', array( &$this ) );
 		wfProfileOut( __METHOD__ );
@@ -237,19 +233,43 @@ class Parser
 		$this->mOutputType = $ot;
 		// Shortcut alias
 		$this->ot = array(
-			'html' => $ot == OT_HTML,
-			'wiki' => $ot == OT_WIKI,
-			'msg' => $ot == OT_MSG,
-			'pre' => $ot == OT_PREPROCESS,
+			'html' => $ot == self::OT_HTML,
+			'wiki' => $ot == self::OT_WIKI,
+			'pre' => $ot == self::OT_PREPROCESS,
 		);
 	}
 
+	/**
+	 * Set the context title
+	 */
+	function setTitle( $t ) {
+		if ( !$t || $t instanceof FakeTitle ) {
+			$t = Title::newFromText( 'NO TITLE' );
+		}
+		if ( strval( $t->getFragment() ) !== '' ) {
+			# Strip the fragment to avoid various odd effects
+			$this->mTitle = clone $t;
+			$this->mTitle->setFragment( '' );
+		} else {
+			$this->mTitle = $t;
+		}
+	}
+
 	/**
 	 * Accessor for mUniqPrefix.
 	 *
 	 * @public
 	 */
 	function uniqPrefix() {
+		if( !isset( $this->mUniqPrefix ) ) {
+			// @fixme this is probably *horribly wrong*
+			// LanguageConverter seems to want $wgParser's uniqPrefix, however
+			// if this is called for a parser cache hit, the parser may not
+			// have ever been initialized in the first place.
+			// Not really sure what the heck is supposed to be going on here.
+			return '';
+			//throw new MWException( "Accessing uninitialized mUniqPrefix" );
+		}
 		return $this->mUniqPrefix;
 	}
 
@@ -281,14 +301,14 @@ class Parser
 		}
 
 		$this->mOptions = $options;
-		$this->mTitle =& $title;
+		$this->setTitle( $title );
 		$oldRevisionId = $this->mRevisionId;
 		$oldRevisionTimestamp = $this->mRevisionTimestamp;
 		if( $revid !== null ) {
 			$this->mRevisionId = $revid;
 			$this->mRevisionTimestamp = null;
 		}
-		$this->setOutputType( OT_HTML );
+		$this->setOutputType( self::OT_HTML );
 		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
 		# No more strip!
 		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
@@ -302,6 +322,7 @@ class Parser
 			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2',
 			# french spaces, Guillemet-right
 			'/(\\302\\253) /' => '\\1 ',
+			'/ (!\s*important)/' => ' \\1', #Beware of CSS magic word !important, bug #11874.
 		);
 		$text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 
@@ -371,18 +392,33 @@ class Parser
 				array_values( $tidyregs ),
 				$text );
 		}
+		global $wgExpensiveParserFunctionLimit;
+		if ( $this->mExpensiveFunctionCount > $wgExpensiveParserFunctionLimit ) {
+			if ( is_callable( array( $this->mOutput, 'addWarning' ) ) ) {
+				$warning = wfMsg( 'expensive-parserfunction-warning', $this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
+				$this->mOutput->addWarning( $warning );
+				$cat = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( 'expensive-parserfunction-category' ) );
+				if ( $cat ) {
+					$this->mOutput->addCategory( $cat->getDBkey(), $this->getDefaultSort() );
+				}
+			}
+		}
 
 		wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
 
 		# Information on include size limits, for the benefit of users who try to skirt them
-		if ( max( $this->mIncludeSizes ) > 1000 ) {
+		if ( $this->mOptions->getEnableLimitReport() ) {
+			global $wgExpensiveParserFunctionLimit;
 			$max = $this->mOptions->getMaxIncludeSize();
-			$text .= "\n";
+			$PFreport = "Expensive parser function count: {$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
+			$limitReport =
+				"NewPP limit report\n" .
+				"Preprocessor node count: {$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
+				"Post-expand include size: {$this->mIncludeSizes['post-expand']}/$max bytes\n" .
+				"Template argument size: {$this->mIncludeSizes['arg']}/$max bytes\n".
+				$PFreport;
+			wfRunHooks( 'ParserLimitReport', array( $this, &$limitReport ) );
+			$text .= "\n\n";
 		}
 		$this->mOutput->setText( $text );
 		$this->mRevisionId = $oldRevisionId;
@@ -413,18 +449,15 @@ class Parser
 	function preprocess( $text, $title, $options, $revid = null ) {
 		wfProfileIn( __METHOD__ );
 		$this->clearState();
-		$this->setOutputType( OT_PREPROCESS );
+		$this->setOutputType( self::OT_PREPROCESS );
 		$this->mOptions = $options;
-		$this->mTitle = $title;
+		$this->setTitle( $title );
 		if( $revid !== null ) {
 			$this->mRevisionId = $revid;
 		}
 		wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
 		wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
 		$text = $this->replaceVariables( $text );
-		if ( $this->mOptions->getRemoveComments() ) {
-			$text = Sanitizer::removeHTMLcomments( $text );
-		}
 		$text = $this->mStripState->unstripBoth( $text );
 		wfProfileOut( __METHOD__ );
 		return $text;
@@ -445,12 +478,29 @@ class Parser
 
 	function getFunctionLang() {
 		global $wgLang, $wgContLang;
-		return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
+
+		$target = $this->mOptions->getTargetLanguage();
+		if ( $target !== null ) {
+			return $target;
+		} else {
+			return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang;
+		}
+	}
+
+	/**
+	 * Get a preprocessor object
+	 */
+	function getPreprocessor() {
+		if ( !isset( $this->mPreprocessor ) ) {
+			$class = $this->mPreprocessorClass;
+			$this->mPreprocessor = new $class( $this );
+		}
+		return $this->mPreprocessor;
 	}
 
 	/**
 	 * Replaces all occurrences of HTML-style comments and the given tags
-	 * in the text with a random marker and returns teh next text. The output
+	 * in the text with a random marker and returns the next text. The output
 	 * parameter $matches will be an associative array filled with data in
 	 * the form:
 	 *   'UNIQ-xxxxx' => array(
@@ -494,7 +544,7 @@ class Parser
 				$inside     = $p[4];
 			}
 
-			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . $this->mMarkerSuffix;
+			$marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . self::MARKER_SUFFIX;
 			$stripped .= $marker;
 
 			if ( $close === '/>' ) {
@@ -586,8 +636,8 @@ class Parser
 	 * @private
 	 */
 	function insertStripItem( $text ) {
-		static $n = 0;
-		$rnd = "{$this->mUniqPrefix}-item-$n-{$this->mMarkerSuffix}";
+		$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
+		$this->mMarkerIndex++;
 		$this->mStripState->general->setPair( $rnd, $text );
 		return $rnd;
 	}
@@ -672,7 +722,7 @@ class Parser
 
 	/**
 	 * Use the HTML tidy PECL extension to use the tidy library in-process,
-	 * saving the overhead of spawning a new process. 
+	 * saving the overhead of spawning a new process.
 	 *
 	 * 'pear install tidy' should be able to compile the extension module.
 	 *
@@ -680,7 +730,7 @@ class Parser
 	 * @static
 	 */
 	function internalTidy( $text ) {
-		global $wgTidyConf, $IP;
+		global $wgTidyConf, $IP, $wgDebugTidy;
 		$fname = 'Parser::internalTidy';
 		wfProfileIn( $fname );
 
@@ -694,6 +744,12 @@ class Parser
 		} else {
 			$cleansource = tidy_get_output( $tidy );
 		}
+		if ( $wgDebugTidy && $tidy->getStatus() > 0 ) {
+			$cleansource .= "', '-->', $tidy->errorBuffer ) .
+				"\n-->";
+		}
+
 		wfProfileOut( $fname );
 		return $cleansource;
 	}
@@ -740,7 +796,7 @@ class Parser
 			} else if ( count ( $td_history ) == 0 ) {
 				// Don't do any of the following
 				continue;
-			} else if ( substr ( $line , 0 , 2 ) == '|}' ) { 
+			} else if ( substr ( $line , 0 , 2 ) == '|}' ) {
 				// We are ending a table
 				$line = '' . substr ( $line , 2 );
 				$last_tag = array_pop ( $last_tag_history );
@@ -892,7 +948,7 @@ class Parser
 
 	/**
 	 * Helper function for parse() that transforms wiki markup into
-	 * HTML. Only called for $mOutputType == OT_HTML.
+	 * HTML. Only called for $mOutputType == self::OT_HTML.
 	 *
 	 * @private
 	 */
@@ -907,11 +963,6 @@ class Parser
 			return $text ;
 		}
 
-		# Remove  tags and  sections
-		$text = strtr( $text, array( '' => '' , '' => '' ) );
-		$text = strtr( $text, array( '' => '', '' => '') );
-		$text = StringUtils::delimiterReplace( '', '', '', $text );
-
 		$text = $this->replaceVariables( $text );
 		$text = Sanitizer::removeHTMLtags( $text, array( &$this, 'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
 		wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, &$this->mStripState ) );
@@ -924,11 +975,10 @@ class Parser
 
 		$text = preg_replace( '/(^|\n)-----*/', '\\1
', $text ); - $text = $this->stripToc( $text ); - $this->stripNoGallery( $text ); + $text = $this->doDoubleUnderscore( $text ); $text = $this->doHeadings( $text ); if($this->mOptions->getUseDynamicDates()) { - $df =& DateFormatter::getInstance(); + $df = DateFormatter::getInstance(); $text = $df->reformat( $this->mOptions->getDateFormat(), $text ); } $text = $this->doAllQuotes( $text ); @@ -980,9 +1030,9 @@ class Parser ' ' => '', 'x' => 'X', )); - $titleObj = SpecialPage::getTitleFor( 'Booksources' ); + $titleObj = SpecialPage::getTitleFor( 'Booksources', $num ); $text = 'escapeLocalUrl() . "\" class=\"internal\">ISBN $isbn"; } else { if ( substr( $m[0], 0, 3 ) == 'RFC' ) { @@ -1470,6 +1520,8 @@ class Parser } if( is_null( $this->mTitle ) ) { + wfProfileOut( $fname ); + wfProfileOut( $fname.'-setup' ); throw new MWException( __METHOD__.": \$this->mTitle is null\n" ); } $nottalk = !$this->mTitle->isTalkPage(); @@ -1559,6 +1611,7 @@ class Parser # should be external links. if (preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $m[1])) { $s .= $prefix . '[[' . $line ; + wfProfileOut( "$fname-misc" ); continue; } @@ -1705,7 +1758,14 @@ class Parser # Special and Media are pseudo-namespaces; no pages actually exist in them if( $ns == NS_MEDIA ) { - $link = $sk->makeMediaLinkObj( $nt, $text ); + # Give extensions a chance to select the file revision for us + $skip = $time = false; + wfRunHooks( 'BeforeParserMakeImageLinkObj', array( &$this, &$nt, &$skip, &$time ) ); + if ( $skip ) { + $link = $sk->makeLinkObj( $nt ); + } else { + $link = $sk->makeMediaLinkObj( $nt, $text, $time ); + } # Cloak with NOPARSE to avoid replacement in replaceExternalLinks $s .= $prefix . $this->armorLinks( $link ) . $trail; $this->mOutput->addImage( $nt->getDBkey() ); @@ -2316,14 +2376,13 @@ class Parser * Some of these require message or data lookups and can be * expensive to check many times. */ - static $varCache = array(); - if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$varCache ) ) ) { - if ( isset( $varCache[$index] ) ) { - return $varCache[$index]; + if ( wfRunHooks( 'ParserGetVariableValueVarCache', array( &$this, &$this->mVarCache ) ) ) { + if ( isset( $this->mVarCache[$index] ) ) { + return $this->mVarCache[$index]; } } - $ts = time(); + $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); wfRunHooks( 'ParserGetVariableValueTs', array( &$this, &$ts ) ); # Use the time zone @@ -2332,7 +2391,7 @@ class Parser $oldtz = getenv( 'TZ' ); putenv( 'TZ='.$wgLocaltimezone ); } - + wfSuppressWarnings(); // E_STRICT system time bitching $localTimestamp = date( 'YmdHis', $ts ); $localMonth = date( 'm', $ts ); @@ -2350,29 +2409,29 @@ class Parser switch ( $index ) { case 'currentmonth': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'm', $ts ) ); case 'currentmonthname': - return $varCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); case 'currentmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); case 'currentmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); case 'currentday': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'j', $ts ) ); case 'currentday2': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'd', $ts ) ); case 'localmonth': - return $varCache[$index] = $wgContLang->formatNum( $localMonth ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localMonth ); case 'localmonthname': - return $varCache[$index] = $wgContLang->getMonthName( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthName( $localMonthName ); case 'localmonthnamegen': - return $varCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthNameGen( $localMonthName ); case 'localmonthabbrev': - return $varCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); + return $this->mVarCache[$index] = $wgContLang->getMonthAbbreviation( $localMonthName ); case 'localday': - return $varCache[$index] = $wgContLang->formatNum( $localDay ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay ); case 'localday2': - return $varCache[$index] = $wgContLang->formatNum( $localDay2 ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDay2 ); case 'pagename': return wfEscapeWikiText( $this->mTitle->getText() ); case 'pagenamee': @@ -2416,14 +2475,34 @@ class Parser wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision...\n" ); return $this->mRevisionId; case 'revisionday': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" ); return intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); case 'revisionday2': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" ); return substr( $this->getRevisionTimestamp(), 6, 2 ); case 'revisionmonth': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" ); return intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); case 'revisionyear': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" ); return substr( $this->getRevisionTimestamp(), 0, 4 ); case 'revisiontimestamp': + // Let the edit saving system know we should parse the page + // *after* a revision ID has been assigned. This is for null edits. + $this->mOutput->setFlag( 'vary-revision' ); + wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); return $this->getRevisionTimestamp(); case 'namespace': return str_replace('_',' ',$wgContLang->getNsText( $this->mTitle->getNamespace() ) ); @@ -2438,51 +2517,51 @@ class Parser case 'subjectspacee': return( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); case 'currentdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); + return $this->mVarCache[$index] = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); case 'currentyear': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); case 'currenttime': - return $varCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); + return $this->mVarCache[$index] = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); case 'currenthour': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); case 'currentweek': // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); case 'currentdow': - return $varCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); + return $this->mVarCache[$index] = $wgContLang->formatNum( gmdate( 'w', $ts ) ); case 'localdayname': - return $varCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); + return $this->mVarCache[$index] = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); case 'localyear': - return $varCache[$index] = $wgContLang->formatNum( $localYear, true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localYear, true ); case 'localtime': - return $varCache[$index] = $wgContLang->time( $localTimestamp, false, false ); + return $this->mVarCache[$index] = $wgContLang->time( $localTimestamp, false, false ); case 'localhour': - return $varCache[$index] = $wgContLang->formatNum( $localHour, true ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localHour, true ); case 'localweek': // @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to // int to remove the padding - return $varCache[$index] = $wgContLang->formatNum( (int)$localWeek ); + return $this->mVarCache[$index] = $wgContLang->formatNum( (int)$localWeek ); case 'localdow': - return $varCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); + return $this->mVarCache[$index] = $wgContLang->formatNum( $localDayOfWeek ); case 'numberofarticles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::articles() ); case 'numberoffiles': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::images() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::images() ); case 'numberofusers': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::users() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::users() ); case 'numberofpages': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::pages() ); case 'numberofadmins': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::admins() ); case 'numberofedits': - return $varCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); + return $this->mVarCache[$index] = $wgContLang->formatNum( SiteStats::edits() ); case 'currenttimestamp': - return $varCache[$index] = wfTimestampNow(); + return $this->mVarCache[$index] = wfTimestamp( TS_MW, $ts ); case 'localtimestamp': - return $varCache[$index] = $localTimestamp; + return $this->mVarCache[$index] = $localTimestamp; case 'currentversion': - return $varCache[$index] = SpecialVersion::getVersion(); + return $this->mVarCache[$index] = SpecialVersion::getVersion(); case 'sitename': return $wgSitename; case 'server': @@ -2498,7 +2577,7 @@ class Parser return $wgContLanguageCode; default: $ret = null; - if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$varCache, &$index, &$ret ) ) ) + if ( wfRunHooks( 'ParserGetVariableValueSwitch', array( &$this, &$this->mVarCache, &$index, &$ret ) ) ) return $ret; else return null; @@ -2520,459 +2599,33 @@ class Parser } /** - * Parse any parentheses in format ((title|part|part)} and return the document tree - * This is the ghost of replace_variables(). + * Preprocess some wikitext and return the document tree. + * This is the ghost of replace_variables(). * * @param string $text The text to parse + * @param integer flags Bitwise combination of: + * self::PTD_FOR_INCLUSION Handle / as if the text is being + * included. Default is to assume a direct page view. + * + * The generated DOM tree must depend only on the input text and the flags. + * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. + * + * Any flag added to the $flags parameter here, or any other parameter liable to cause a + * change in the DOM tree for a given text, must be passed through the section identifier + * in the section edit link and thus back to extractSections(). + * + * The output of this function is currently only cached in process memory, but a persistent + * cache may be implemented at a later date which takes further advantage of these strict + * dependency requirements. + * * @private */ - function preprocessToDom ( $text ) { - wfProfileIn( __METHOD__ ); - wfProfileIn( __METHOD__.'-makexml' ); - - static $msgRules, $normalRules; - if ( !$msgRules ) { - $msgRules = array( - '{' => array( - 'end' => '}', - 'names' => array( - 2 => 'template', - ), - 'min' => 2, - 'max' => 2, - ), - '[' => array( - 'end' => ']', - 'names' => array( 2 => null ), - 'min' => 2, - 'max' => 2, - ) - ); - $normalRules = array( - '{' => array( - 'end' => '}', - 'names' => array( - 2 => 'template', - 3 => 'tplarg', - ), - 'min' => 2, - 'max' => 3, - ), - '[' => array( - 'end' => ']', - 'names' => array( 2 => null ), - 'min' => 2, - 'max' => 2, - ) - ); - } - if ( $this->ot['msg'] ) { - $rules = $msgRules; - } else { - $rules = $normalRules; - } - - $extElements = implode( '|', $this->getStripList() ); - // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset - $extElementsRegex = "/($extElements)(?:\s|\/>|>)|(!--)/iA"; - - $stack = array(); # Stack of unclosed parentheses - $stackIndex = -1; # Stack read pointer - - $searchBase = implode( '', array_keys( $rules ) ) . '<'; - - $i = -1; # Input pointer, starts out pointing to a pseudo-newline before the start - $topAccum = ''; # Top level text accumulator - $accum =& $topAccum; # Current text accumulator - $findEquals = false; # True to find equals signs in arguments - $findHeading = false; # True to look at LF characters for possible headings - $findPipe = false; # True to take notice of pipe characters - $headingIndex = 1; - $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i - - while ( $i < strlen( $text ) ) { - if ( $i == -1 ) { - $found = 'line-start'; - $curChar = ''; - } else { - # Find next opening brace, closing brace or pipe - $search = $searchBase; - if ( $stackIndex == -1 ) { - $currentClosing = ''; - // Look for headings only at the top stack level - // Among other things, this resolves the ambiguity between = - // for headings and = for template arguments - $search .= "\n"; - } else { - $currentClosing = $stack[$stackIndex]['close']; - $search .= $currentClosing; - } - if ( $findPipe ) { - $search .= '|'; - } - if ( $findEquals ) { - $search .= '='; - } - $rule = null; - # Output literal section, advance input counter - $literalLength = strcspn( $text, $search, $i ); - if ( $literalLength > 0 ) { - $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) ); - $i += $literalLength; - } - if ( $i >= strlen( $text ) ) { - if ( $currentClosing == "\n" ) { - // Do a past-the-end run to finish off the heading - $curChar = ''; - $found = 'line-end'; - } else { - # All done - break; - } - } else { - $curChar = $text[$i]; - if ( $curChar == '|' ) { - $found = 'pipe'; - } elseif ( $curChar == '=' ) { - $found = 'equals'; - } elseif ( $curChar == '<' ) { - $found = 'angle'; - } elseif ( $curChar == "\n" ) { - if ( $stackIndex == -1 ) { - $found = 'line-start'; - } else { - $found = 'line-end'; - } - } elseif ( $curChar == $currentClosing ) { - $found = 'close'; - } elseif ( isset( $rules[$curChar] ) ) { - $found = 'open'; - $rule = $rules[$curChar]; - } else { - # Some versions of PHP have a strcspn which stops on null characters - # Ignore and continue - ++$i; - continue; - } - } - } - - if ( $found == 'angle' ) { - $matches = false; - // Determine element name - if ( !preg_match( $extElementsRegex, $text, $matches, 0, $i + 1 ) ) { - // Element name missing or not listed - $accum .= '<'; - ++$i; - continue; - } - // Handle comments - if ( isset( $matches[2] ) && $matches[2] == '!--' ) { - // HTML comment, scan to end - $endpos = strpos( $text, '-->', $i + 4 ); - if ( $endpos === false ) { - // Unclosed comment in input, runs to end - $accum .= htmlspecialchars( substr( $text, $i ) ); - if ( $this->ot['html'] ) { - // Close it so later stripping can remove it - $accum .= htmlspecialchars( '-->' ); - } - $i = strlen( $text ); - continue; - } - $accum .= htmlspecialchars( substr( $text, $i, $endpos - $i + 3 ) ); - #$inner = substr( $text, $i + 4, $endpos - $i - 4 ); - #$accum .= '!--' . htmlspecialchars( $inner ) . ''; - $i = $endpos + 3; - continue; - } - $name = $matches[1]; - $attrStart = $i + strlen( $name ) + 1; - - // Find end of tag - $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart ); - if ( $tagEndPos === false ) { - // Infinite backtrack - // Disable tag search to prevent worst-case O(N^2) performance - $noMoreGT = true; - $accum .= '<'; - ++$i; - continue; - } - if ( $text[$tagEndPos-1] == '/' ) { - $attrEnd = $tagEndPos - 1; - $inner = null; - $i = $tagEndPos + 1; - $close = ''; - } else { - $attrEnd = $tagEndPos; - // Find closing tag - if ( preg_match( "/<\/$name\s*>/i", $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) ) { - $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 ); - $i = $matches[0][1] + strlen( $matches[0][0] ); - $close = '' . htmlspecialchars( $matches[0][0] ) . ''; - } else { - // No end tag -- let it run out to the end of the text. - $inner = substr( $text, $tagEndPos + 1 ); - $i = strlen( $text ); - $close = ''; - } - } - $accum .= ''; - if ( $attrEnd <= $attrStart ) { - $attr = ''; - } else { - $attr = substr( $text, $attrStart, $attrEnd - $attrStart ); - } - $accum .= '' . htmlspecialchars( $name ) . '' . - // Note that the attr element contains the whitespace between name and attribute, - // this is necessary for precise reconstruction during pre-save transform. - '' . htmlspecialchars( $attr ) . ''; - if ( $inner !== null ) { - $accum .= '' . htmlspecialchars( $inner ) . ''; - } - $accum .= $close . ''; - } - - elseif ( $found == 'line-start' ) { - // Is this the start of a heading? - // Line break belongs before the heading element in any case - $accum .= $curChar; - $i++; - - $count = strspn( $text, '=', $i, 6 ); - if ( $count > 0 ) { - $piece = array( - 'open' => "\n", - 'close' => "\n", - 'parts' => array( str_repeat( '=', $count ) ), - 'count' => $count ); - $stack[++$stackIndex] = $piece; - $i += $count; - $accum =& $stack[$stackIndex]['parts'][0]; - $findPipe = false; - } - } - - elseif ( $found == 'line-end' ) { - $piece = $stack[$stackIndex]; - // A heading must be open, otherwise \n wouldn't have been in the search list - assert( $piece['open'] == "\n" ); - assert( $stackIndex == 0 ); - // Search back through the accumulator to see if it has a proper close - // No efficient way to do this in PHP AFAICT: strrev, PCRE search with $ anchor - // and rtrim are all O(N) in total size. Optimal would be O(N) in trailing - // whitespace size only. - $m = false; - $count = $piece['count']; - if ( preg_match( "/(={{$count}})\s*$/", $accum, $m, 0, $count ) ) { - // Found match, output - $count = min( strlen( $m[1] ), $count ); - $element = "$accum"; - $headingIndex++; - } else { - // No match, no , just pass down the inner text - $element = $accum; - } - // Unwind the stack - // Headings can only occur on the top level, so this is a bit simpler than the - // generic stack unwind operation in the close case - unset( $stack[$stackIndex--] ); - $accum =& $topAccum; - $findEquals = false; - $findPipe = false; - - // Append the result to the enclosing accumulator - $accum .= $element; - // Note that we do NOT increment the input pointer. - // This is because the closing linebreak could be the opening linebreak of - // another heading. Infinite loops are avoided because the next iteration MUST - // hit the heading open case above, which unconditionally increments the - // input pointer. - } - - elseif ( $found == 'open' ) { - # count opening brace characters - $count = strspn( $text, $curChar, $i ); - - # we need to add to stack only if opening brace count is enough for one of the rules - if ( $count >= $rule['min'] ) { - # Add it to the stack - $piece = array( - 'open' => $curChar, - 'close' => $rule['end'], - 'count' => $count, - 'parts' => array( '' ), - 'eqpos' => array(), - 'lineStart' => ($i > 0 && $text[$i-1] == "\n"), - ); - - $stackIndex ++; - $stack[$stackIndex] = $piece; - $accum =& $stack[$stackIndex]['parts'][0]; - $findEquals = false; - $findPipe = true; - } else { - # Add literal brace(s) - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); - } - $i += $count; - } - - elseif ( $found == 'close' ) { - $piece = $stack[$stackIndex]; - # lets check if there are enough characters for closing brace - $maxCount = $piece['count']; - $count = strspn( $text, $curChar, $i, $maxCount ); - - # check for maximum matching characters (if there are 5 closing - # characters, we will probably need only 3 - depending on the rules) - $matchingCount = 0; - $rule = $rules[$piece['open']]; - if ( $count > $rule['max'] ) { - # The specified maximum exists in the callback array, unless the caller - # has made an error - $matchingCount = $rule['max']; - } else { - # Count is less than the maximum - # Skip any gaps in the callback array to find the true largest match - # Need to use array_key_exists not isset because the callback can be null - $matchingCount = $count; - while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) { - --$matchingCount; - } - } - - if ($matchingCount <= 0) { - # No matching element found in callback array - # Output a literal closing brace and continue - $accum .= htmlspecialchars( str_repeat( $curChar, $count ) ); - $i += $count; - continue; - } - $name = $rule['names'][$matchingCount]; - if ( $name === null ) { - // No element, just literal text - $element = str_repeat( $piece['open'], $matchingCount ) . - implode( '|', $piece['parts'] ) . - str_repeat( $rule['end'], $matchingCount ); - } else { - # Create XML element - # Note: $parts is already XML, does not need to be encoded further - $parts = $piece['parts']; - $title = $parts[0]; - unset( $parts[0] ); - - # The invocation is at the start of the line if lineStart is set in - # the stack, and all opening brackets are used up. - if ( $maxCount == $matchingCount && !empty( $piece['lineStart'] ) ) { - $attr = ' lineStart="1"'; - } else { - $attr = ''; - } - - $element = "<$name$attr>"; - $element .= "$title"; - $argIndex = 1; - foreach ( $parts as $partIndex => $part ) { - if ( isset( $piece['eqpos'][$partIndex] ) ) { - $eqpos = $piece['eqpos'][$partIndex]; - list( $ws1, $argName, $ws2 ) = self::splitWhitespace( substr( $part, 0, $eqpos ) ); - list( $ws3, $argValue, $ws4 ) = self::splitWhitespace( substr( $part, $eqpos + 1 ) ); - $element .= "$ws1$argName$ws2=$ws3$argValue$ws4"; - } else { - list( $ws1, $value, $ws2 ) = self::splitWhitespace( $part ); - $element .= "$ws1$value$ws2"; - $argIndex++; - } - } - $element .= ""; - } - - # Advance input pointer - $i += $matchingCount; - - # Unwind the stack - unset( $stack[$stackIndex--] ); - if ( $stackIndex == -1 ) { - $accum =& $topAccum; - $findEquals = false; - $findPipe = false; - } else { - $partCount = count( $stack[$stackIndex]['parts'] ); - $accum =& $stack[$stackIndex]['parts'][$partCount - 1]; - $findPipe = $stack[$stackIndex]['open'] != "\n"; - $findEquals = $findPipe && $partCount > 1 - && !isset( $stack[$stackIndex]['eqpos'][$partCount - 1] ); - } - - # Re-add the old stack element if it still has unmatched opening characters remaining - if ($matchingCount < $piece['count']) { - $piece['parts'] = array( '' ); - $piece['count'] -= $matchingCount; - $piece['eqpos'] = array(); - # do we still qualify for any callback with remaining count? - $names = $rules[$piece['open']]['names']; - $skippedBraces = 0; - $enclosingAccum =& $accum; - while ( $piece['count'] ) { - if ( array_key_exists( $piece['count'], $names ) ) { - $stackIndex++; - $stack[$stackIndex] = $piece; - $accum =& $stack[$stackIndex]['parts'][0]; - $findEquals = true; - $findPipe = true; - break; - } - --$piece['count']; - $skippedBraces ++; - } - $enclosingAccum .= str_repeat( $piece['open'], $skippedBraces ); - } - - # Add XML element to the enclosing accumulator - $accum .= $element; - } - - elseif ( $found == 'pipe' ) { - $stack[$stackIndex]['parts'][] = ''; - $partsCount = count( $stack[$stackIndex]['parts'] ); - $accum =& $stack[$stackIndex]['parts'][$partsCount - 1]; - $findEquals = true; - ++$i; - } - - elseif ( $found == 'equals' ) { - $findEquals = false; - $partsCount = count( $stack[$stackIndex]['parts'] ); - $stack[$stackIndex]['eqpos'][$partsCount - 1] = strlen( $accum ); - $accum .= '='; - ++$i; - } - } - - # Output any remaining unclosed brackets - foreach ( $stack as $piece ) { - if ( $piece['open'] == "\n" ) { - $topAccum .= $piece['parts'][0]; - } else { - $topAccum .= str_repeat( $piece['open'], $piece['count'] ) . implode( '|', $piece['parts'] ); - } - } - $topAccum .= ''; - - wfProfileOut( __METHOD__.'-makexml' ); - wfProfileIn( __METHOD__.'-loadXML' ); - $dom = new DOMDocument; - if ( !$dom->loadXML( $topAccum ) ) { - throw new MWException( __METHOD__.' generated invalid XML' ); - } - wfProfileOut( __METHOD__.'-loadXML' ); - wfProfileOut( __METHOD__ ); + function preprocessToDom ( $text, $flags = 0 ) { + $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); return $dom; } - /* + /* * Return a three-element array: leading whitespace, string contents, trailing whitespace */ public static function splitWhitespace( $s ) { @@ -2994,9 +2647,9 @@ class Parser * taking care to avoid infinite loops. * * Note that the substitution depends on value of $mOutputType: - * OT_WIKI: only {{subst:}} templates - * OT_MSG: only magic variables - * OT_HTML: all templates and magic variables + * self::OT_WIKI: only {{subst:}} templates + * self::OT_PREPROCESS: templates but not extension tags + * self::OT_HTML: all templates and extension tags * * @param string $tex The text to transform * @param PPFrame $frame Object describing the arguments passed to the template @@ -3013,14 +2666,14 @@ class Parser wfProfileIn( $fname ); if ( $frame === false ) { - $frame = new PPFrame( $this ); + $frame = $this->getPreprocessor()->newFrame(); } elseif ( !( $frame instanceof PPFrame ) ) { throw new MWException( __METHOD__ . ' called using the old argument format' ); } $dom = $this->preprocessToDom( $text ); $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; - $text = $frame->expand( $dom, 0, $flags ); + $text = $frame->expand( $dom, $flags ); wfProfileOut( $fname ); return $text; @@ -3054,9 +2707,9 @@ class Parser * replacing any variables or templates within the template. * * @param array $piece The parts of the template - * $piece['text']: matched text * $piece['title']: the title, i.e. the part before the | * $piece['parts']: the parameter array + * $piece['lineStart']: whether the brace was at the start of a line * @param PPFrame The current frame, contains template arguments * @return string the text of the template * @private @@ -3070,17 +2723,16 @@ class Parser # Flags $found = false; # $text has been filled $nowiki = false; # wiki markup in $text should be escaped - $noparse = false; # Unsafe HTML tags should not be stripped, etc. - $noargs = false; # Don't replace triple-brace arguments in $text $isHTML = false; # $text is HTML, armour it against wikitext transformation $forceRawInterwiki = false; # Force interwiki transclusion to be done in raw mode not rendered - $isDOM = false; # $text is a DOM node needing expansion + $isChildObj = false; # $text is a DOM node needing expansion in a child frame + $isLocalObj = false; # $text is a DOM node needing expansion in the current frame # Title object, where $text came from $title = NULL; - # $part1 is the bit before the first |, and must contain only title characters. - # Various prefixes will be stripped from it later. + # $part1 is the bit before the first |, and must contain only title characters. + # Various prefixes will be stripped from it later. $titleWithSpaces = $frame->expand( $piece['title'] ); $part1 = trim( $titleWithSpaces ); $titleText = false; @@ -3095,54 +2747,49 @@ class Parser # SUBST wfProfileIn( __METHOD__.'-modifiers' ); if ( !$found ) { - $mwSubst =& MagicWord::get( 'subst' ); + $mwSubst = MagicWord::get( 'subst' ); if ( $mwSubst->matchStartAndRemove( $part1 ) xor $this->ot['wiki'] ) { # One of two possibilities is true: # 1) Found SUBST but not in the PST phase # 2) Didn't find SUBST and in the PST phase # In either case, return without further processing - $text = '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + $isLocalObj = true; $found = true; - $noparse = true; - $noargs = true; } } # Variables - if ( !$found && $args->length == 0 ) { + if ( !$found && $args->getLength() == 0 ) { $id = $this->mVariables->matchStartToEnd( $part1 ); if ( $id !== false ) { $text = $this->getVariableValue( $id ); - $this->mOutput->mContainsOldMagic = true; + if (MagicWord::getCacheTTL($id)>-1) + $this->mOutput->mContainsOldMagic = true; $found = true; - $noparse = true; - $noargs = true; } } # MSG, MSGNW and RAW if ( !$found ) { # Check for MSGNW: - $mwMsgnw =& MagicWord::get( 'msgnw' ); + $mwMsgnw = MagicWord::get( 'msgnw' ); if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: - $mwMsg =& MagicWord::get( 'msg' ); + $mwMsg = MagicWord::get( 'msg' ); $mwMsg->matchStartAndRemove( $part1 ); } # Check for RAW: - $mwRaw =& MagicWord::get( 'raw' ); + $mwRaw = MagicWord::get( 'raw' ); if ( $mwRaw->matchStartAndRemove( $part1 ) ) { $forceRawInterwiki = true; } } wfProfileOut( __METHOD__.'-modifiers' ); - # Save path level before recursing into functions & templates. - $lastPathLevel = $this->mTemplatePath; - # Parser functions if ( !$found ) { wfProfileIn( __METHOD__ . '-pfunc' ); @@ -3170,32 +2817,25 @@ class Parser # Add a frame parameter, and pass the arguments as an array $allArgs = $initialArgs; $allArgs[] = $frame; - foreach ( $args as $arg ) { - $funcArgs[] = $arg; + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $funcArgs[] = $args->item( $i ); } $allArgs[] = $funcArgs; } else { # Convert arguments to plain text - foreach ( $args as $arg ) { - $funcArgs[] = trim( $frame->expand( $arg ) ); + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $funcArgs[] = trim( $frame->expand( $args->item( $i ) ) ); } $allArgs = array_merge( $initialArgs, $funcArgs ); } - if (! is_callable($callback)) { - if (is_array($callback)) - $callback = $callback[0]; - else - die ("\nInvalid callback for $function: '$callback' ($flags)\n"); + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $callback ) ) { + throw new MWException( "Tag hook for $name is not callable\n" ); } - $result = call_user_func_array( $callback, $allArgs ); $found = true; - // The text is usually already parsed, doesn't need triple-brace tags expanded, etc. - $noargs = true; - $noparse = true; - if ( is_array( $result ) ) { if ( isset( $result[0] ) ) { $text = $result[0]; @@ -3203,7 +2843,7 @@ class Parser } // Extract flags into the local scope - // This allows callers to set flags such as nowiki, noparse, found, etc. + // This allows callers to set flags such as nowiki, found, etc. extract( $result ); } else { $text = $result; @@ -3231,13 +2871,17 @@ class Parser $wgContLang->findVariantLink($part1, $title); } # Do infinite loop check - if ( isset( $this->mTemplatePath[$titleText] ) ) { - $noparse = true; - $noargs = true; + if ( !$frame->loopCheck( $title ) ) { $found = true; - $text = "[[$part1]]" . $this->insertStripItem( '' ); + $text = "Template loop detected: [[$titleText]]"; wfDebug( __METHOD__.": template loop broken at '$titleText'\n" ); } + # Do recursion depth check + $limit = $this->mOptions->getMaxTemplateDepth(); + if ( $frame->depth >= $limit ) { + $found = true; + $text = "Template recursion depth limit exceeded ($limit)"; + } } } @@ -3249,8 +2893,6 @@ class Parser $text = SpecialPage::capturePath( $title ); if ( is_string( $text ) ) { $found = true; - $noparse = true; - $noargs = true; $isHTML = true; $this->disableCache(); } @@ -3261,7 +2903,7 @@ class Parser list( $text, $title ) = $this->getTemplateDom( $title ); if ( $text !== false ) { $found = true; - $isDOM = true; + $isChildObj = true; } } @@ -3275,108 +2917,81 @@ class Parser if ( $this->ot['html'] && !$forceRawInterwiki ) { $text = $this->interwikiTransclude( $title, 'render' ); $isHTML = true; - $noparse = true; } else { $text = $this->interwikiTransclude( $title, 'raw' ); + // Preprocess it like a template + $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); + $isChildObj = true; } $found = true; } wfProfileOut( __METHOD__ . '-loadtpl' ); } - # Recursive parsing, escaping and link table handling - # Only for HTML output - if ( $nowiki && $found && ( $this->ot['html'] || $this->ot['pre'] ) ) { - if ( $isDOM ) { - $text = $frame->expand( $text ); - } - $text = wfEscapeWikiText( $text ); - } elseif ( !$this->ot['msg'] && $found ) { - if ( $noargs ) { - $newFrame = $frame->newChild(); - } else { - # Clean up argument array - $newFrame = $frame->newChild( $args, $title ); - # Add a new element to the templace recursion path - $this->mTemplatePath[$titleText] = 1; - } - - if ( !$noparse ) { - if ( $isDOM ) { - if ( $titleText !== false && count( $newFrame->args ) == 0 ) { - # Expansion is eligible for the empty-frame cache - if ( isset( $this->mTplExpandCache[$titleText] ) ) { - $text = $this->mTplExpandCache[$titleText]; - } else { - $text = $newFrame->expand( $text ); - $this->mTplExpandCache[$titleText] = $text; - } - } else { - $text = $newFrame->expand( $text ); - } - } else { - $text = $this->replaceVariables( $text, $newFrame ); - } + # If we haven't found text to substitute by now, we're done + # Recover the source wikitext and return it + if ( !$found ) { + $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + wfProfileOut( $fname ); + return array( 'object' => $text ); + } - # strip woz 'ere 2004-07 + # Expand DOM-style return values in a child frame + if ( $isChildObj ) { + # Clean up argument array + $newFrame = $frame->newChild( $args, $title ); - # Bug 529: if the template begins with a table or block-level - # element, it should be treated as beginning a new line. - # This behaviour is somewhat controversial. - if (!$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ - $text = "\n" . $text; - } - } elseif ( !$noargs ) { - # $noparse and !$noargs - # Just replace the arguments, not any double-brace items - # This is used for rendered interwiki transclusion - if ( $isDOM ) { - $text = $newFrame->expand( $text, 0, PPFrame::NO_TEMPLATES ); + if ( $nowiki ) { + $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); + } elseif ( $titleText !== false && $newFrame->isEmpty() ) { + # Expansion is eligible for the empty-frame cache + if ( isset( $this->mTplExpandCache[$titleText] ) ) { + $text = $this->mTplExpandCache[$titleText]; } else { - $text = $this->replaceVariables( $text, $newFrame, true ); + $text = $newFrame->expand( $text ); + $this->mTplExpandCache[$titleText] = $text; } - } elseif ( $isDOM ) { - $text = $frame->expand( $text ); + } else { + # Uncached expansion + $text = $newFrame->expand( $text ); } - } elseif ( $isDOM ) { - $text = $frame->expand( $text, 0, PPFrame::NO_TEMPLATES | PPFrame::NO_ARGS ); + } + if ( $isLocalObj && $nowiki ) { + $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); + $isLocalObj = false; } - # Prune lower levels off the recursion check path - $this->mTemplatePath = $lastPathLevel; + # Replace raw HTML by a placeholder + # Add a blank line preceding, to prevent it from mucking up + # immediately preceding headings + if ( $isHTML ) { + $text = "\n\n" . $this->insertStripItem( $text ); + } + # Escape nowiki-style return values + elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) { + $text = wfEscapeWikiText( $text ); + } + # Bug 529: if the template begins with a table or block-level + # element, it should be treated as beginning a new line. + # This behaviour is somewhat controversial. + elseif ( is_string( $text ) && !$piece['lineStart'] && preg_match('/^(?:{\\||:|;|#|\*)/', $text)) /*}*/{ + $text = "\n" . $text; + } - if ( $found && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { + if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { # Error, oversize inclusion - $text = "[[$originalTitle]]" . + $text = "[[$originalTitle]]" . $this->insertStripItem( '' ); - $noparse = true; - $noargs = true; } - if ( !$found ) { - wfProfileOut( $fname ); - return '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; + if ( $isLocalObj ) { + $ret = array( 'object' => $text ); } else { - wfProfileIn( __METHOD__ . '-placeholders' ); - if ( $isHTML ) { - # Replace raw HTML by a placeholder - # Add a blank line preceding, to prevent it from mucking up - # immediately preceding headings - $text = "\n\n" . $this->insertStripItem( $text ); - } - wfProfileOut( __METHOD__ . '-placeholders' ); + $ret = array( 'text' => $text ); } - # Prune lower levels off the recursion check path - $this->mTemplatePath = $lastPathLevel; - - if ( !$found ) { - wfProfileOut( $fname ); - return '{{' . $frame->implode( '|', $titleWithSpaces, $args ) . '}}'; - } else { - wfProfileOut( $fname ); - return $text; - } + wfProfileOut( $fname ); + return $ret; } /** @@ -3384,8 +2999,9 @@ class Parser * and its redirect destination title. Cached. */ function getTemplateDom( $title ) { + $cacheTitle = $title; $titleText = $title->getPrefixedDBkey(); - + if ( isset( $this->mTplRedirCache[$titleText] ) ) { list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; $title = Title::makeTitle( $ns, $dbk ); @@ -3403,21 +3019,14 @@ class Parser return array( false, $title ); } - # If there are any tags, only include them - if ( !$this->ot['msg'] ) { - if ( in_string( '', $text ) && in_string( '', $text ) ) { - $replacer = new OnlyIncludeReplacer; - StringUtils::delimiterReplaceCallback( '', '', - array( &$replacer, 'replace' ), $text ); - $text = $replacer->output; - } - # Remove sections and tags - $text = StringUtils::delimiterReplace( '', '', '', $text ); - $text = strtr( $text, array( '' => '' , '' => '' ) ); + $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); + $this->mTplDomCache[ $titleText ] = $dom; + + if (! $title->equals($cacheTitle)) { + $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = + array( $title->getNamespace(),$cdb = $title->getDBkey() ); } - $dom = $this->preprocessToDom( $text ); - $this->mTplDomCache[$titleText] = $dom; return array( $dom, $title ); } @@ -3450,13 +3059,13 @@ class Parser $text = $skip = false; $finalTitle = $title; $deps = array(); - + // Loop to fetch the article, with up to 1 redirect for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { # Give extensions a chance to select the revision instead $id = false; // Assume current wfRunHooks( 'BeforeParserFetchTemplateAndtitle', array( false, &$title, &$skip, &$id ) ); - + if( $skip ) { $text = false; $deps[] = array( @@ -3468,9 +3077,9 @@ class Parser $rev = $id ? Revision::newFromId( $id ) : Revision::newFromTitle( $title ); $rev_id = $rev ? $rev->getId() : 0; - $deps[] = array( - 'title' => $title, - 'page_id' => $title->getArticleID(), + $deps[] = array( + 'title' => $title, + 'page_id' => $title->getArticleID(), 'rev_id' => $rev_id ); if( $rev ) { @@ -3548,30 +3157,41 @@ class Parser function argSubstitution( $piece, $frame ) { wfProfileIn( __METHOD__ ); - $text = false; $error = false; $parts = $piece['parts']; - $argWithSpaces = $frame->expand( $piece['title'] ); - $arg = trim( $argWithSpaces ); - - if ( isset( $frame->args[$arg] ) ) { - $text = $frame->parent->expand( $frame->args[$arg] ); - } else if ( ( $this->ot['html'] || $this->ot['pre'] ) && $parts->length > 0 ) { - $text = $frame->expand( $parts->item( 0 ) ); + $nameWithSpaces = $frame->expand( $piece['title'] ); + $argName = trim( $nameWithSpaces ); + $object = false; + $text = $frame->getArgument( $argName ); + if ( $text === false && $parts->getLength() > 0 + && ( + $this->ot['html'] + || $this->ot['pre'] + || ( $this->ot['wiki'] && $frame->isTemplate() ) + ) + ) { + # No match in frame, use the supplied default + $object = $parts->item( 0 )->getChildren(); } if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { $error = ''; } - if ( $text === false ) { - $text = '{{{' . $frame->implode( '|', $argWithSpaces, $parts ) . '}}}'; + if ( $text === false && $object === false ) { + # No match anywhere + $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); } if ( $error !== false ) { $text .= $error; } + if ( $object !== false ) { + $ret = array( 'object' => $object ); + } else { + $ret = array( 'text' => $text ); + } wfProfileOut( __METHOD__ ); - return $text; + return $ret; } /** @@ -3579,29 +3199,29 @@ class Parser * This is the ghost of strip(). * * @param array $params Associative array of parameters: - * name DOMNode for the tag name - * attrText DOMNode for unparsed text where tag attributes are thought to be + * name PPNode for the tag name + * attr PPNode for unparsed text where tag attributes are thought to be + * attributes Optional associative array of parsed attributes * inner Contents of extension element * noClose Original text did not have a close tag * @param PPFrame $frame */ function extensionSubstitution( $params, $frame ) { global $wgRawHtml, $wgContLang; - static $n = 1; $name = $frame->expand( $params['name'] ); - $attrText = is_null( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); - $content = is_null( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); + $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); + $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); + + $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX; - $marker = "{$this->mUniqPrefix}-$name-" . sprintf('%08X', $n++) . $this->mMarkerSuffix; - if ( $this->ot['html'] ) { - if ( $name == '!--' ) { - return ''; - } $name = strtolower( $name ); - $params = Sanitizer::decodeTagAttributes( $attrText ); + $attributes = Sanitizer::decodeTagAttributes( $attrText ); + if ( isset( $params['attributes'] ) ) { + $attributes = $attributes + $params['attributes']; + } switch ( $name ) { case 'html': if( $wgRawHtml ) { @@ -3615,30 +3235,39 @@ class Parser break; case 'math': $output = $wgContLang->armourMath( - MathRenderer::renderMath( $content, $params ) ); + MathRenderer::renderMath( $content, $attributes ) ); break; case 'gallery': - $output = $this->renderImageGallery( $content, $params ); + $output = $this->renderImageGallery( $content, $attributes ); break; default: if( isset( $this->mTagHooks[$name] ) ) { + # Workaround for PHP bug 35229 and similar + if ( !is_callable( $this->mTagHooks[$name] ) ) { + throw new MWException( "Tag hook for $name is not callable\n" ); + } $output = call_user_func_array( $this->mTagHooks[$name], - array( $content, $params, $this ) ); + array( $content, $attributes, $this ) ); } else { throw new MWException( "Invalid call hook $name" ); } } } else { - if ( $name == '!--' ) { - $output = ''; - } else { - if ( $content === null ) { - $output = "<$name$attrText/>"; - } else { - $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); - $output = "<$name$attrText>$content$close"; + if ( is_null( $attrText ) ) { + $attrText = ''; + } + if ( isset( $params['attributes'] ) ) { + foreach ( $params['attributes'] as $attrName => $attrValue ) { + $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . + htmlspecialchars( $attrValue ) . '"'; } } + if ( $content === null ) { + $output = "<$name$attrText/>"; + } else { + $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); + $output = "<$name$attrText>$content$close"; + } } if ( $name == 'html' || $name == 'nowiki' ) { @@ -3666,32 +3295,25 @@ class Parser } /** - * Detect __NOGALLERY__ magic word and set a placeholder + * Increment the expensive function count + * + * @return boolean False if the limit has been exceeded */ - function stripNoGallery( &$text ) { - # if the string __NOGALLERY__ (not case-sensitive) occurs in the HTML, - # do not add TOC - $mw = MagicWord::get( 'nogallery' ); - $this->mOutput->mNoGallery = $mw->matchAndRemove( $text ) ; + function incrementExpensiveFunctionCount() { + global $wgExpensiveParserFunctionLimit; + $this->mExpensiveFunctionCount++; + if($this->mExpensiveFunctionCount <= $wgExpensiveParserFunctionLimit) { + return true; + } + return false; } /** - * Find the first __TOC__ magic word and set a - * placeholder that will then be replaced by the real TOC in - * ->formatHeadings, this works because at this points real - * comments will have already been discarded by the sanitizer. - * - * Any additional __TOC__ magic words left over will be discarded - * as there can only be one TOC on the page. + * Strip double-underscore items like __NOGALLERY__ and __NOTOC__ + * Fills $this->mDoubleUnderscores, returns the modified text */ - function stripToc( $text ) { - # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, - # do not add TOC - $mw = MagicWord::get( 'notoc' ); - if( $mw->matchAndRemove( $text ) ) { - $this->mShowToc = false; - } - + function doDoubleUnderscore( $text ) { + // The position of __TOC__ needs to be recorded $mw = MagicWord::get( 'toc' ); if( $mw->match( $text ) ) { $this->mShowToc = true; @@ -3703,6 +3325,27 @@ class Parser // Only keep the first one. $text = $mw->replace( '', $text ); } + + // Now match and remove the rest of them + $mwa = MagicWord::getDoubleUnderscoreArray(); + $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); + + if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { + $this->mOutput->mNoGallery = true; + } + if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { + $this->mShowToc = false; + } + if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) { + $this->mOutput->setProperty( 'hiddencat', 'y' ); + + $containerCategory = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( 'hidden-category-category' ) ); + if ( $containerCategory ) { + $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); + } else { + wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); + } + } return $text; } @@ -3731,8 +3374,7 @@ class Parser } # Inhibit editsection links if requested in the page - $esw =& MagicWord::get( 'noeditsection' ); - if( $esw->matchAndRemove( $text ) ) { + if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { $showEditLink = 0; } @@ -3748,14 +3390,13 @@ class Parser # Allow user to stipulate that a page should have a "new section" # link added via __NEWSECTIONLINK__ - $mw =& MagicWord::get( 'newsectionlink' ); - if( $mw->matchAndRemove( $text ) ) + if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { $this->mOutput->setNewSection( true ); + } # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, # override above conditions and always show TOC above first header - $mw =& MagicWord::get( 'forcetoc' ); - if ($mw->matchAndRemove( $text ) ) { + if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { $this->mShowToc = true; $enoughToc = true; } @@ -3779,8 +3420,9 @@ class Parser $prevlevel = 0; $toclevel = 0; $prevtoclevel = 0; - $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-{$this->mMarkerSuffix}"; + $markerRegex = "{$this->mUniqPrefix}-h-(\d+)-" . self::MARKER_SUFFIX; $baseTitleText = $this->mTitle->getPrefixedDBkey(); + $tocraw = array(); foreach( $matches[3] as $headline ) { $isTemplate = false; @@ -3837,6 +3479,7 @@ class Parser if($prevtoclevel < $wgMaxTocLevel) { # Unindent only if the previous toc level was shown :p $toc .= $sk->tocUnindent( $prevtoclevel - $toclevel ); + $prevtoclevel = $toclevel; } else { $toc .= $sk->tocLineEnd(); } @@ -3895,11 +3538,15 @@ class Parser # Save headline for section edit hint before it's escaped $headlineHint = $safeHeadline; $safeHeadline = Sanitizer::escapeId( $safeHeadline ); + # HTML names must be case-insensitively unique (bug 10721) + $arrayKey = strtolower( $safeHeadline ); + + # XXX : Is $refers[$headlineCount] ever accessed, actually ? $refers[$headlineCount] = $safeHeadline; # count how many in assoc. array so we can track dupes in anchors - isset( $refers[$safeHeadline] ) ? $refers[$safeHeadline]++ : $refers[$safeHeadline] = 1; - $refcount[$headlineCount] = $refers[$safeHeadline]; + isset( $refers[$arrayKey] ) ? $refers[$arrayKey]++ : $refers[$arrayKey] = 1; + $refcount[$headlineCount] = $refers[$arrayKey]; # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { @@ -3914,13 +3561,17 @@ class Parser } if( $enoughToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) { $toc .= $sk->tocLine($anchor, $tocline, $numbering, $toclevel); + $tocraw[] = array( 'toclevel' => $toclevel, 'level' => $level, 'line' => $tocline, 'number' => $numbering ); } # give headline the correct tag if( $showEditLink && $sectionIndex !== false ) { - if( $isTemplate ) - $editlink = $sk->editSectionLinkForOther($titleText, $sectionIndex); - else + if( $isTemplate ) { + # Put a T flag in the section identifier, to indicate to extractSections() + # that sections inside should be counted. + $editlink = $sk->editSectionLinkForOther($titleText, "T-$sectionIndex"); + } else { $editlink = $sk->editSectionLink($this->mTitle, $sectionIndex, $headlineHint); + } } else { $editlink = ''; } @@ -3929,11 +3580,13 @@ class Parser $headlineCount++; } + $this->mOutput->setSections( $tocraw ); + # Never ever show TOC if no headers if( $numVisible < 1 ) { $enoughToc = false; } - + if( $enoughToc ) { if( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { $toc .= $sk->tocUnindent( $prevtoclevel - 1 ); @@ -3987,8 +3640,8 @@ class Parser */ function preSaveTransform( $text, &$title, $user, $options, $clearState = true ) { $this->mOptions = $options; - $this->mTitle =& $title; - $this->setOutputType( OT_WIKI ); + $this->setTitle( $title ); + $this->setOutputType( self::OT_WIKI ); if ( $clearState ) { $this->clearState(); @@ -4014,24 +3667,25 @@ class Parser * the database, we use $wgContLang here in order to give * everyone the same signature and use the default one rather * than the one selected in each user's preferences. + * + * (see also bug 12815) */ + $ts = $this->mOptions->getTimestamp(); + $tz = 'UTC'; if ( isset( $wgLocaltimezone ) ) { + $unixts = wfTimestamp( TS_UNIX, $ts ); $oldtz = getenv( 'TZ' ); putenv( 'TZ='.$wgLocaltimezone ); - } - $d = $wgContLang->timeanddate( date( 'YmdHis' ), false, false) . - ' (' . date( 'T' ) . ')'; - if ( isset( $wgLocaltimezone ) ) { + $ts = date( 'YmdHis', $unixts ); + $tz = date( 'T', $unixts ); # might vary on DST changeover! putenv( 'TZ='.$oldtz ); } + $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tz)"; # Variable replacement # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags $text = $this->replaceVariables( $text ); - # Strip out etc. added via replaceVariables - #$text = $this->strip( $text, $this->mStripState, false, array( 'gallery' ) ); - # Signatures $sigText = $this->getUserSig( $user ); $text = strtr( $text, array( @@ -4081,11 +3735,11 @@ class Parser */ function getUserSig( &$user ) { global $wgMaxSigChars; - + $username = $user->getName(); $nickname = $user->getOption( 'nickname' ); $nickname = $nickname === '' ? $username : $nickname; - + if( mb_strlen( $nickname ) > $wgMaxSigChars ) { $nickname = $username; wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); @@ -4108,9 +3762,9 @@ class Parser $userText = wfEscapeWikiText( $username ); $nickText = wfEscapeWikiText( $nickname ); if ( $user->isAnon() ) { - return wfMsgForContent( 'signature-anon', $userText, $nickText ); + return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); } else { - return wfMsgForContent( 'signature', $userText, $nickText ); + return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); } } @@ -4135,18 +3789,30 @@ class Parser * @return string Signature text */ function cleanSig( $text, $parsing = false ) { - global $wgTitle; - $this->startExternalParse( $wgTitle, new ParserOptions(), $parsing ? OT_WIKI : OT_MSG ); + if ( !$parsing ) { + global $wgTitle; + $this->clearState(); + $this->setTitle( $wgTitle ); + $this->mOptions = new ParserOptions; + $this->setOutputType = self::OT_PREPROCESS; + } + # FIXME: regex doesn't respect extension tags or nowiki + # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); $substText = '{{' . $substWord->getSynonym( 0 ); $text = preg_replace( $substRegex, $substText, $text ); $text = $this->cleanSigInSig( $text ); - $text = $this->replaceVariables( $text ); + $dom = $this->preprocessToDom( $text ); + $frame = $this->getPreprocessor()->newFrame(); + $text = $frame->expand( $dom ); + + if ( !$parsing ) { + $text = $this->mStripState->unstripBoth( $text ); + } - $this->clearState(); return $text; } @@ -4166,7 +3832,7 @@ class Parser * @public */ function startExternalParse( &$title, $options, $outputType, $clearState = true ) { - $this->mTitle =& $title; + $this->setTitle( $title ); $this->mOptions = $options; $this->setOutputType( $outputType ); if ( $clearState ) { @@ -4175,16 +3841,11 @@ class Parser } /** - * Transform a MediaWiki message by replacing magic variables. + * Wrapper for preprocess() * - * For some unknown reason, it also expands templates, but only to the - * first recursion level. This is wrong and broken, probably introduced - * accidentally during refactoring, but probably relied upon by thousands - * of users. - * - * @param string $text the text to transform + * @param string $text the text to preprocess * @param ParserOptions $options options - * @return string the text with variables substituted + * @return string * @public */ function transformMsg( $text, $options ) { @@ -4200,16 +3861,7 @@ class Parser $executing = true; wfProfileIn($fname); - - if ( $wgTitle && !( $wgTitle instanceof FakeTitle ) ) { - $this->mTitle = $wgTitle; - } else { - $this->mTitle = Title::newFromText('msg'); - } - $this->mOptions = $options; - $this->setOutputType( OT_MSG ); - $this->clearState(); - $text = $this->replaceVariables( $text ); + $text = $this->preprocess( $text, $wgTitle, $options ); $executing = false; wfProfileOut($fname); @@ -4235,7 +3887,9 @@ class Parser $tag = strtolower( $tag ); $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; $this->mTagHooks[$tag] = $callback; - $this->mStripList[] = $tag; + if( !in_array( $tag, $this->mStripList ) ) { + $this->mStripList[] = $tag; + } return $oldVal; } @@ -4248,6 +3902,14 @@ class Parser return $oldVal; } + /** + * Remove all tag hooks + */ + function clearTagHooks() { + $this->mTagHooks = array(); + $this->mStripList = $this->mDefaultStripList; + } + /** * Create a function, e.g. {{sum:1|2|3}} * The callback function should have the form: @@ -4259,8 +3921,6 @@ class Parser * found The text returned is valid, stop processing the template. This * is on by default. * nowiki Wiki markup in the return value should be escaped - * noparse Unsafe HTML tags should not be stripped, etc. - * noargs Don't replace triple-brace arguments in the return value * isHTML The returned text is HTML, armour it against wikitext transformation * * @public @@ -4314,10 +3974,7 @@ class Parser /** * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() - * Returns an array of links found, indexed by PDBK: - * 0 - broken - * 1 - normal link - * 2 - stub + * Returns an array of link CSS classes, indexed by PDBK. * $options is a bit field, RLH_FOR_UPDATE to select for update */ function replaceLinkHolders( &$text, $options = 0 ) { @@ -4329,8 +3986,9 @@ class Parser $pdbks = array(); $colours = array(); + $linkcolour_ids = array(); $sk = $this->mOptions->getSkin(); - $linkCache =& LinkCache::singleton(); + $linkCache = LinkCache::singleton(); if ( !empty( $this->mLinkHolders['namespaces'] ) ) { wfProfileIn( $fname.'-check' ); @@ -4357,22 +4015,19 @@ class Parser # Check if it's a static known link, e.g. interwiki if ( $title->isAlwaysKnown() ) { - $colours[$pdbk] = 1; + $colours[$pdbk] = ''; } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) { - $colours[$pdbk] = 1; + $colours[$pdbk] = ''; $this->mOutput->addLink( $title, $id ); } elseif ( $linkCache->isBadLink( $pdbk ) ) { - $colours[$pdbk] = 0; + $colours[$pdbk] = 'new'; } elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) { - $colours[$pdbk] = 0; + $colours[$pdbk] = 'new'; } else { # Not in the link cache, add it to the query if ( !isset( $current ) ) { $current = $ns; - $query = "SELECT page_id, page_namespace, page_title"; - if ( $threshold > 0 ) { - $query .= ', page_len, page_is_redirect'; - } + $query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; $query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN("; } elseif ( $current != $ns ) { $current = $ns; @@ -4394,20 +4049,17 @@ class Parser # Fetch data and form into an associative array # non-existent = broken - # 1 = known - # 2 = stub while ( $s = $dbr->fetchObject($res) ) { $title = Title::makeTitle( $s->page_namespace, $s->page_title ); $pdbk = $title->getPrefixedDBkey(); - $linkCache->addGoodLinkObj( $s->page_id, $title ); + $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect ); $this->mOutput->addLink( $title, $s->page_id ); - - $colours[$pdbk] = ( $threshold == 0 || ( - $s->page_len >= $threshold || # always true if $threshold <= 0 - $s->page_is_redirect || - !Namespace::isContent( $s->page_namespace ) ) - ? 1 : 2 ); + $colours[$pdbk] = $sk->getLinkColour( $title, $threshold ); + //add id to the extension todolist + $linkcolour_ids[$s->page_id] = $pdbk; } + //pass an array of page_ids to an extension + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } wfProfileOut( $fname.'-check' ); @@ -4463,10 +4115,7 @@ class Parser // construct query $titleClause = $linkBatch->constructSet('page', $dbr); - $variantQuery = "SELECT page_id, page_namespace, page_title"; - if ( $threshold > 0 ) { - $variantQuery .= ', page_len, page_is_redirect'; - } + $variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len"; $variantQuery .= " FROM $page WHERE $titleClause"; if ( $options & RLH_FOR_UPDATE ) { @@ -4485,7 +4134,7 @@ class Parser $holderKeys = array(); if(isset($variantMap[$varPdbk])){ $holderKeys = $variantMap[$varPdbk]; - $linkCache->addGoodLinkObj( $s->page_id, $variantTitle ); + $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect ); $this->mOutput->addLink( $variantTitle, $s->page_id ); } @@ -4503,18 +4152,10 @@ class Parser // set pdbk and colour $pdbks[$key] = $varPdbk; - if ( $threshold > 0 ) { - $size = $s->page_len; - if ( $s->page_is_redirect || $s->page_namespace != 0 || $size >= $threshold ) { - $colours[$varPdbk] = 1; - } else { - $colours[$varPdbk] = 2; - } - } - else { - $colours[$varPdbk] = 1; - } + $colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold ); + $linkcolour_ids[$s->page_id] = $pdbk; } + wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) ); } // check if the object is a variant of a category @@ -4547,19 +4188,15 @@ class Parser $pdbk = $pdbks[$key]; $searchkey = ""; $title = $this->mLinkHolders['titles'][$key]; - if ( empty( $colours[$pdbk] ) ) { + if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) { $linkCache->addBadLinkObj( $title ); - $colours[$pdbk] = 0; + $colours[$pdbk] = 'new'; $this->mOutput->addLink( $title, 0 ); $replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title, $this->mLinkHolders['texts'][$key], $this->mLinkHolders['queries'][$key] ); - } elseif ( $colours[$pdbk] == 1 ) { - $replacePairs[$searchkey] = $sk->makeKnownLinkObj( $title, - $this->mLinkHolders['texts'][$key], - $this->mLinkHolders['queries'][$key] ); - } elseif ( $colours[$pdbk] == 2 ) { - $replacePairs[$searchkey] = $sk->makeStubLinkObj( $title, + } else { + $replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk], $this->mLinkHolders['texts'][$key], $this->mLinkHolders['queries'][$key] ); } @@ -4687,7 +4324,7 @@ class Parser if( isset( $params['heights'] ) ) { $ig->setHeights( $params['heights'] ); } - + wfRunHooks( 'BeforeParserrenderImageGallery', array( &$this, &$ig ) ); $lines = explode( "\n", $text ); @@ -4712,13 +4349,7 @@ class Parser $label = ''; } - $pout = $this->parse( $label, - $this->mTitle, - $this->mOptions, - false, // Strip whitespace...? - false // Don't clear state! - ); - $html = $pout->getText(); + $html = $this->recursiveTagParse( trim( $label ) ); $ig->add( $nt, $html ); @@ -4740,9 +4371,9 @@ class Parser // Initialise static lists static $internalParamNames = array( 'horizAlign' => array( 'left', 'right', 'center', 'none' ), - 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', + 'vertAlign' => array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ), - 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', + 'frame' => array( 'thumbnail', 'manualthumb', 'framed', 'frameless', 'upright', 'border' ), ); static $internalParamMap; @@ -4774,8 +4405,6 @@ class Parser * Parse image options text and use it to make an image */ function makeImage( $title, $options ) { - # @TODO: let the MediaHandler specify its transform parameters - # # Check if the options text is of the form "options|alt text" # Options are: # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang @@ -4797,7 +4426,7 @@ class Parser # * middle # * bottom # * text-bottom - + $parts = array_map( 'trim', explode( '|', $options) ); $sk = $this->mOptions->getSkin(); @@ -4817,25 +4446,61 @@ class Parser # Process the input parameters $caption = ''; - $params = array( 'frame' => array(), 'handler' => array(), + $params = array( 'frame' => array(), 'handler' => array(), 'horizAlign' => array(), 'vertAlign' => array() ); foreach( $parts as $part ) { list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); - if ( isset( $paramMap[$magicName] ) ) { + $validated = false; + if( isset( $paramMap[$magicName] ) ) { list( $type, $paramName ) = $paramMap[$magicName]; - $params[$type][$paramName] = $value; - + // Special case; width and height come in one variable together if( $type == 'handler' && $paramName == 'width' ) { $m = array(); - if ( preg_match( '/^([0-9]*)x([0-9]*)$/', $value, $m ) ) { - $params[$type]['width'] = intval( $m[1] ); - $params[$type]['height'] = intval( $m[2] ); + # (bug 13500) In both cases (width/height and width only), + # permit trailing "px" for backward compatibility. + if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { + $width = intval( $m[1] ); + $height = intval( $m[2] ); + if ( $handler->validateParam( 'width', $width ) ) { + $params[$type]['width'] = $width; + $validated = true; + } + if ( $handler->validateParam( 'height', $height ) ) { + $params[$type]['height'] = $height; + $validated = true; + } + } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { + $width = intval( $value ); + if ( $handler->validateParam( 'width', $width ) ) { + $params[$type]['width'] = $width; + $validated = true; + } + } // else no validation -- bug 13436 + } else { + if ( $type == 'handler' ) { + # Validate handler parameter + $validated = $handler->validateParam( $paramName, $value ); } else { - $params[$type]['width'] = intval( $value ); + # Validate internal parameters + switch( $paramName ) { + case "manualthumb": + /// @fixme - possibly check validity here? + /// downstream behavior seems odd with missing manual thumbs. + $validated = true; + break; + default: + // Most other things appear to be empty or numeric... + $validated = ( $value === false || is_numeric( trim( $value ) ) ); + } + } + + if ( $validated ) { + $params[$type][$paramName] = $value; } } - } else { + } + if ( !$validated ) { $caption = $part; } } @@ -4848,15 +4513,6 @@ class Parser $params['frame']['valign'] = key( $params['vertAlign'] ); } - # Validate the handler parameters - if ( $handler ) { - foreach ( $params['handler'] as $name => $value ) { - if ( !$handler->validateParam( $name, $value ) ) { - unset( $params['handler'][$name] ); - } - } - } - # Strip bad stuff out of the alt text $alt = $this->replaceLinkHoldersText( $caption ); @@ -4869,8 +4525,10 @@ class Parser $params['frame']['alt'] = $alt; $params['frame']['caption'] = $caption; + wfRunHooks( 'ParserMakeImageParams', array( $title, $file, &$params ) ); + # Linker does the rest - $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'] ); + $ret = $sk->makeImageLink2( $title, $file, $params['frame'], $params['handler'], $time ); # Give the handler a chance to modify the parser object if ( $handler ) { @@ -4926,52 +4584,67 @@ class Parser * * External callers should use the getSection and replaceSection methods. * - * @param $text Page wikitext - * @param $section Numbered section. 0 pulls the text before the first - * heading; other numbers will pull the given section - * along with its lower-level subsections. If the section is - * not found, $mode=get will return $newtext, and - * $mode=replace will return $text. - * @param $mode One of "get" or "replace" - * @param $newText Replacement text for section data. + * @param string $text Page wikitext + * @param string $section A section identifier string of the form: + * - - ... -
+ * + * Currently the only recognised flag is "T", which means the target section number + * was derived during a template inclusion parse, in other words this is a template + * section edit link. If no flags are given, it was an ordinary section edit link. + * This flag is required to avoid a section numbering mismatch when a section is + * enclosed by (bug 6563). + * + * The section number 0 pulls the text before the first heading; other numbers will + * pull the given section along with its lower-level subsections. If the section is + * not found, $mode=get will return $newtext, and $mode=replace will return $text. + * + * @param string $mode One of "get" or "replace" + * @param string $newText Replacement text for section data. * @return string for "get", the extracted section text. * for "replace", the whole page with the section replaced. */ private function extractSections( $text, $section, $mode, $newText='' ) { + global $wgTitle; $this->clearState(); + $this->setTitle( $wgTitle ); // not generally used but removes an ugly failure mode $this->mOptions = new ParserOptions; - $this->setOutputType( OT_WIKI ); - $curIndex = 0; + $this->setOutputType( self::OT_WIKI ); $outText = ''; - $frame = new PPFrame( $this ); - + $frame = $this->getPreprocessor()->newFrame(); + + // Process section extraction flags + $flags = 0; + $sectionParts = explode( '-', $section ); + $sectionIndex = array_pop( $sectionParts ); + foreach ( $sectionParts as $part ) { + if ( $part == 'T' ) { + $flags |= self::PTD_FOR_INCLUSION; + } + } // Preprocess the text - $dom = $this->preprocessToDom( $text ); - $root = $dom->documentElement; + $root = $this->preprocessToDom( $text, $flags ); // nodes indicate section breaks // They can only occur at the top level, so we can find them by iterating the root's children - $node = $root->firstChild; + $node = $root->getFirstChild(); // Find the target section - if ( $section == 0 ) { + if ( $sectionIndex == 0 ) { // Section zero doesn't nest, level=big $targetLevel = 1000; } else { - while ( $node ) { - if ( $node->nodeName == 'h' ) { - if ( $curIndex + 1 == $section ) { + while ( $node ) { + if ( $node->getName() == 'h' ) { + $bits = $node->splitHeading(); + if ( $bits['i'] == $sectionIndex ) { + $targetLevel = $bits['level']; break; } - $curIndex++; } if ( $mode == 'replace' ) { - $outText .= $frame->expand( $node ); + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } - $node = $node->nextSibling; - } - if ( $node ) { - $targetLevel = $node->getAttribute( 'level' ); + $node = $node->getNextSibling(); } } @@ -4986,34 +4659,34 @@ class Parser // Find the end of the section, including nested sections do { - if ( $node->nodeName == 'h' ) { - $curIndex++; - $curLevel = $node->getAttribute( 'level' ); - if ( $curIndex != $section && $curLevel <= $targetLevel ) { + if ( $node->getName() == 'h' ) { + $bits = $node->splitHeading(); + $curLevel = $bits['level']; + if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { break; } } if ( $mode == 'get' ) { - $outText .= $frame->expand( $node ); + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); } - $node = $node->nextSibling; + $node = $node->getNextSibling(); } while ( $node ); - + // Write out the remainder (in replace mode only) if ( $mode == 'replace' ) { // Output the replacement text - // Add two newlines on -- trailing whitespace in $newText is conventionally + // Add two newlines on -- trailing whitespace in $newText is conventionally // stripped by the editor, so we need both newlines to restore the paragraph gap $outText .= $newText . "\n\n"; while ( $node ) { - $outText .= $frame->expand( $node ); - $node = $node->nextSibling; + $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); + $node = $node->getNextSibling(); } } if ( is_string( $outText ) ) { // Re-insert stripped tags - $outText = trim( $this->mStripState->unstripBoth( $outText ) ); + $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); } return $outText; @@ -5026,9 +4699,9 @@ class Parser * * If a section contains subsections, these are also returned. * - * @param $text String: text to look in - * @param $section Integer: section number - * @param $deftext: default to return if section is not found + * @param string $text text to look in + * @param string $section section identifier + * @param string $deftext default to return if section is not found * @return string text of the requested section */ public function getSection( $text, $section, $deftext='' ) { @@ -5096,8 +4769,8 @@ class Parser } /** - * Try to guess the section anchor name based on a wikitext fragment - * presumably extracted from a heading, for example "Header" from + * Try to guess the section anchor name based on a wikitext fragment + * presumably extracted from a heading, for example "Header" from * "== Header ==". */ public function guessSectionNameFromWikiText( $text ) { @@ -5120,14 +4793,14 @@ class Parser /** * Strips a text string of wikitext for use in a section anchor - * + * * Accepts a text string and then removes all wikitext from the * string and leaves only the resultant text (i.e. the result of * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended * to create valid section anchors by mimicing the output of the * parser when headings are parsed. - * + * * @param $text string Text string to be stripped of wikitext * for use in a Section anchor * @return Filtered text string @@ -5136,43 +4809,78 @@ class Parser # Strip internal link markup $text = preg_replace('/\[\[:?([^[|]+)\|([^[]+)\]\]/','$2',$text); $text = preg_replace('/\[\[:?([^[]+)\|?\]\]/','$1',$text); - + # Strip external link markup (FIXME: Not Tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace('/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/','$2',$text); - + # Parse wikitext quotes (italics & bold) $text = $this->doQuotes($text); - + # Strip HTML tags $text = StringUtils::delimiterReplace( '<', '>', '', $text ); return $text; } + function srvus( $text ) { + return $this->testSrvus( $text, $this->mOutputType ); + } + /** * strip/replaceVariables/unstrip for preprocessor regression testing */ - function srvus( $text ) { + function testSrvus( $text, $title, $options, $outputType = self::OT_HTML ) { + $this->clearState(); + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); + } + $this->mTitle = $title; + $this->mOptions = $options; + $this->setOutputType( $outputType ); $text = $this->replaceVariables( $text ); $text = $this->mStripState->unstripBoth( $text ); + $text = Sanitizer::removeHTMLtags( $text ); return $text; } -} -/** - * @todo document, briefly. - * @addtogroup Parser - */ -class OnlyIncludeReplacer { - var $output = ''; + function testPst( $text, $title, $options ) { + global $wgUser; + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); + } + return $this->preSaveTransform( $text, $title, $wgUser, $options ); + } - function replace( $matches ) { - if ( substr( $matches[1], -1 ) == "\n" ) { - $this->output .= substr( $matches[1], 0, -1 ); - } else { - $this->output .= $matches[1]; + function testPreprocess( $text, $title, $options ) { + if ( ! ( $title instanceof Title ) ) { + $title = Title::newFromText( $title ); } + return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); + } + + function markerSkipCallback( $s, $callback ) { + $i = 0; + $out = ''; + while ( $i < strlen( $s ) ) { + $markerStart = strpos( $s, $this->mUniqPrefix, $i ); + if ( $markerStart === false ) { + $out .= call_user_func( $callback, substr( $s, $i ) ); + break; + } else { + $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); + $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); + if ( $markerEnd === false ) { + $out .= substr( $s, $markerStart ); + break; + } else { + $markerEnd += strlen( self::MARKER_SUFFIX ); + $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); + $i = $markerEnd; + } + } + } + return $out; } } @@ -5221,234 +4929,17 @@ class StripState { } /** - * An expansion frame, used as a context to expand the result of preprocessToDom() + * @todo document, briefly. + * @addtogroup Parser */ -class PPFrame { - var $parser, $title; - - const NO_ARGS = 1; - const NO_TEMPLATES = 2; - - /** - * Construct a new preprocessor frame. - * @param Parser $parser The parent parser - * @param Title $title The context title, or false if there isn't one - */ - function __construct( $parser ) { - $this->parser = $parser; - $this->title = $parser->mTitle; - } - - /** - * Create a new child frame - * $args is optionally a DOMNodeList containing the template arguments - */ - function newChild( $args = false, $title = false ) { - $assocArgs = array(); - if ( $title === false ) { - $title = $this->title; - } - if ( $args !== false ) { - $xpath = false; - foreach ( $args as $arg ) { - if ( !$xpath ) { - $xpath = new DOMXPath( $arg->ownerDocument ); - } - - $nameNodes = $xpath->query( 'name', $arg ); - if ( $nameNodes->item( 0 )->hasAttributes() ) { - // Numbered parameter - $name = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent; - } else { - // Named parameter - $name = $this->expand( $nameNodes->item( 0 ) ); - } - - $value = $xpath->query( 'value', $arg ); - $assocArgs[$name] = $value->item( 0 ); - } - } - return new PPTemplateFrame( $this->parser, $this, $assocArgs, $title ); - } - - /** - * Expand a DOMNode describing a preprocessed document into plain wikitext, - * using the current context - * @param $root the node - */ - function expand( $root, $shallowFlags = 0, $deepFlags = 0 ) { - if ( is_string( $root ) ) { - return $root; - } - - if ( $this->parser->ot['html'] - && ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount ) - { - return $this->parser->insertStripItem( '' ); - } - $flags = $shallowFlags | $deepFlags; +class OnlyIncludeReplacer { + var $output = ''; - if ( is_array( $root ) ) { - $s = ''; - foreach ( $root as $node ) { - $s .= $this->expand( $node, 0, $deepFlags ); - } - } elseif ( $root instanceof DOMNodeList ) { - $s = ''; - foreach ( $root as $node ) { - $s .= $this->expand( $node, 0, $deepFlags ); - } - } elseif ( $root instanceof DOMNode ) { - if ( $root->nodeType == XML_TEXT_NODE ) { - $s = $root->nodeValue; - } elseif ( $root->nodeName == 'template' ) { - # Double-brace expansion - $xpath = new DOMXPath( $root->ownerDocument ); - $titles = $xpath->query( 'title', $root ); - $title = $titles->item( 0 ); - $parts = $xpath->query( 'part', $root ); - if ( $flags & self::NO_TEMPLATES ) { - $s = '{{' . $this->implodeWithFlags( '|', 0, $deepFlags, $title, $parts ) . '}}'; - } else { - $lineStart = $root->getAttribute( 'lineStart' ); - $params = array( - 'title' => $title, - 'parts' => $parts, - 'lineStart' => $lineStart, - 'text' => 'FIXME' ); - $s = $this->parser->braceSubstitution( $params, $this ); - } - } elseif ( $root->nodeName == 'tplarg' ) { - # Triple-brace expansion - $xpath = new DOMXPath( $root->ownerDocument ); - $titles = $xpath->query( 'title', $root ); - $title = $titles->item( 0 ); - $parts = $xpath->query( 'part', $root ); - if ( $flags & self::NO_ARGS || $this->parser->ot['msg'] ) { - $s = '{{{' . $this->implode( '|', 0, $deepFlags, $title, $parts ) . '}}}'; - } else { - $params = array( 'title' => $title, 'parts' => $parts, 'text' => 'FIXME' ); - $s = $this->parser->argSubstitution( $params, $this ); - } - } elseif ( $root->nodeName == 'ext' ) { - # Extension tag - $xpath = new DOMXPath( $root->ownerDocument ); - $names = $xpath->query( 'name', $root ); - $attrs = $xpath->query( 'attr', $root ); - $inners = $xpath->query( 'inner', $root ); - $closes = $xpath->query( 'close', $root ); - $params = array( - 'name' => $names->item( 0 ), - 'attr' => $attrs->length > 0 ? $attrs->item( 0 ) : null, - 'inner' => $inners->length > 0 ? $inners->item( 0 ) : null, - 'close' => $closes->length > 0 ? $closes->item( 0 ) : null, - ); - $s = $this->parser->extensionSubstitution( $params, $this ); - } elseif ( $root->nodeName == 'h' ) { - # Heading - $s = $this->expand( $root->childNodes, 0, $deepFlags ); - - if ( $this->parser->ot['html'] ) { - # Insert heading index marker - $headingIndex = $root->getAttribute( 'i' ); - $titleText = $this->title->getPrefixedDBkey(); - $this->parser->mHeadings[] = array( $titleText, $headingIndex ); - $serial = count( $this->parser->mHeadings ) - 1; - $marker = "{$this->parser->mUniqPrefix}-h-$serial-{$this->parser->mMarkerSuffix}"; - $count = $root->getAttribute( 'level' ); - - // FIXME: bug-for-bug with old parser - // Lose whitespace for no apparent reason - // Remove this after differential testing is done - if ( true ) { - // Good version - $s = substr( $s, 0, $count ) . $marker . substr( $s, $count ); - } else { - // Bad version - if ( preg_match( '/^(={1,6})(.*?)(={1,6})\s*?$/', $s, $m ) ) { - if ( $m[2] != '' ) { - $s = $m[1] . $marker . $m[2] . $m[3]; - } - } - } - $this->parser->mStripState->general->setPair( $marker, '' ); - } - } else { - # Generic recursive expansion - $s = ''; - for ( $node = $root->firstChild; $node; $node = $node->nextSibling ) { - if ( $node->nodeType == XML_TEXT_NODE ) { - $s .= $node->nodeValue; - } elseif ( $node->nodeType == XML_ELEMENT_NODE ) { - $s .= $this->expand( $node, 0, $deepFlags ); - } - } - } + function replace( $matches ) { + if ( substr( $matches[1], -1 ) == "\n" ) { + $this->output .= substr( $matches[1], 0, -1 ); } else { - throw new MWException( __METHOD__.': Invalid parameter type' ); - } - return $s; - } - - function implodeWithFlags( $sep, $shallowFlags, $deepFlags /*, ... */ ) { - $args = array_slice( func_get_args(), 3 ); - - $first = true; - $s = ''; - foreach ( $args as $root ) { - if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) { - $root = array( $root ); - } - foreach ( $root as $node ) { - if ( $first ) { - $first = false; - } else { - $s .= $sep; - } - $s .= $this->expand( $node, $shallowFlags, $deepFlags ); - } - } - return $s; - } - - function implode( $sep /*, ... */ ) { - $args = func_get_args(); - $args = array_merge( array_slice( $args, 0, 1 ), array( 0, 0 ), array_slice( $args, 1 ) ); - return call_user_func_array( array( $this, 'implodeWithFlags' ), $args ); - } - - function __toString() { - return 'frame{}'; - } -} - -/** - * Expansion frame with template arguments - */ -class PPTemplateFrame extends PPFrame { - public $parser, $args, $parent, $serial; - - function __construct( $parser, $parent = false, $args = array(), $title = false ) { - $this->parser = $parser; - $this->parent = $parent; - $this->args = $args; - $this->title = $title; - } - - function __toString() { - $s = 'tplframe{'; - $first = true; - foreach ( $this->args as $name => $value ) { - if ( $first ) { - $first = false; - } else { - $s .= ', '; - } - $s .= "\"$name\":\"" . - str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"'; + $this->output .= $matches[1]; } - $s .= '}'; - return $s; } } -