Make RECOVER_ORIG preserve extension tags
[lhc/web/wiklou.git] / includes / parser / Parser.php
index 11a73f3..66cfd55 100644 (file)
@@ -119,203 +119,102 @@ class Parser {
        const TOC_START = '<mw:toc>';
        const TOC_END = '</mw:toc>';
 
-       # Persistent
+       # Persistent:
+       var $mTagHooks = array();
+       var $mTransparentTagHooks = array();
+       var $mFunctionHooks = array();
+       var $mFunctionSynonyms = array( 0 => array(), 1 => array() );
+       var $mFunctionTagHooks = array();
+       var $mStripList = array();
+       var $mDefaultStripList = array();
+       var $mVarCache = array();
+       var $mImageParams = array();
+       var $mImageParamsMagicArray = array();
+       var $mMarkerIndex = 0;
+       var $mFirstCall = true;
 
-       /** @var array */
-       public $mTagHooks = array();
-
-       /** @var array */
-       public $mTransparentTagHooks = array();
-
-       /** @var array */
-       public $mFunctionHooks = array();
-
-       /** @var array */
-       protected $mFunctionSynonyms = array( 0 => array(), 1 => array() );
-
-       /** @var array */
-       protected $mFunctionTagHooks = array();
-
-       /** @var array */
-       protected $mStripList = array();
+       # Initialised by initialiseVariables()
 
        /**
-        * @var array
-        * @todo Unused?
+        * @var MagicWordArray
         */
-       private $mDefaultStripList = array();
+       var $mVariables;
 
-       /** @var array */
-       protected $mVarCache = array();
-
-       /** @var array */
-       protected $mImageParams = array();
-
-       /** @var array */
-       protected $mImageParamsMagicArray = array();
-
-       /** @var int */
-       public $mMarkerIndex = 0;
-
-       /** @var bool */
-       protected $mFirstCall = true;
-
-       # Initialised by initialiseVariables()
-
-       /** @var MagicWordArray */
-       public $mVariables;
-
-       /** @var MagicWordArray */
-       protected $mSubstWords;
-
-       # Initialised in constructor
-
-       /** @var array */
-       protected $mConf;
-
-       /** @var Parser */
-       public $mPreprocessor;
-
-       /** @var string */
-       protected $mExtLinkBracketedRegex;
-
-       /** @var string */
-       protected $mUrlProtocols;
+       /**
+        * @var MagicWordArray
+        */
+       var $mSubstWords;
+       var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor
 
        # Cleared with clearState():
-
-       /** @var ParserOutput */
-       public $mOutput;
-
-       /** @var int */
-       protected $mAutonumber;
-
-       /** @var bool */
-       protected $mDTopen;
-
-       /** @var StripState */
-       public $mStripState;
-
        /**
-        * @var array
-        * @todo Unused?
+        * @var ParserOutput
         */
-       private $mIncludeCount;
+       var $mOutput;
+       var $mAutonumber, $mDTopen;
 
        /**
-        * @var bool
-        * @todo Unused?
+        * @var StripState
         */
-       private $mArgStack;
-
-       /** @var string */
-       protected $mLastSection;
-
-       /** @var bool */
-       protected $mInPre;
-
-       /** @var LinkHolderArray */
-       protected $mLinkHolders;
-
-       /** @var int */
-       protected $mLinkID;
-
-       /** @var array */
-       protected $mIncludeSizes;
-
-       /** @var int */
-       public $mPPNodeCount;
-
-       /** @var int */
-       public $mGeneratedPPNodeCount;
-
-       /** @var int */
-       public $mHighestExpansionDepth;
-
-       /** @var bool|string */
-       protected $mDefaultSort;
+       var $mStripState;
 
-       /** @var array Empty-frame expansion cache */
-       protected $mTplExpandCache;
-
-       /** @var array */
-       protected $mTplRedirCache;
-
-       /** @var array */
-       protected $mTplDomCache;
-
-       /** @var array */
-       public $mHeadings;
-
-       /** @var array */
-       protected $mDoubleUnderscores;
-
-       /** @var int Number of expensive parser function calls */
-       protected $mExpensiveFunctionCount;
-
-       /** @var bool */
-       protected $mShowToc;
+       var $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
+       /**
+        * @var LinkHolderArray
+        */
+       var $mLinkHolders;
 
-       /** @var bool */
-       protected $mForceTocPosition;
+       var $mLinkID;
+       var $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
+       var $mDefaultSort;
+       var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
+       var $mExpensiveFunctionCount; # number of expensive parser function calls
+       var $mShowToc, $mForceTocPosition;
 
-       /** @var User User object; only used when doing pre-save transform */
-       protected $mUser;
+       /**
+        * @var User
+        */
+       var $mUser; # User object; only used when doing pre-save transform
 
        # Temporary
        # These are variables reset at least once per parse regardless of $clearState
 
-       /** @var ParserOptions */
-       public $mOptions;
-
-       /** @var Title Title context, used for self-link rendering and similar things */
-       public $mTitle;
-
-       /** @var array Shortcut alias, see setOutputType() */
-       public $ot;
-
-       /** @var string The timestamp of the specified revision ID */
-       public $mRevisionTimestamp;
-
-       /** @var string */
-       public $mUniqPrefix;
-
        /**
-        * @var boolean Recursive call protection.
-        * This variable should be treated as if it were private.
+        * @var ParserOptions
         */
-       public $mInParse = false;
-
-       /** @var int Output type, one of the OT_xxx constants */
-       protected $mOutputType;
-
-       /** @var Revision The revision object of the specified revision ID */
-       protected $mRevisionObject;
+       var $mOptions;
 
-       /** @var int ID to display in {{REVISIONID}} tags */
-       protected $mRevisionId;
-
-       /** @var string User to display in {{REVISIONUSER}} tag */
-       protected $mRevisionUser;
-
-       /** @var int Size to display in {{REVISIONSIZE}} variable */
-       protected $mRevisionSize;
+       /**
+        * @var Title
+        */
+       var $mTitle;        # Title context, used for self-link rendering and similar things
+       var $mOutputType;   # Output type, one of the OT_xxx constants
+       var $ot;            # Shortcut alias, see setOutputType()
+       var $mRevisionObject; # The revision object of the specified revision ID
+       var $mRevisionId;   # ID to display in {{REVISIONID}} tags
+       var $mRevisionTimestamp; # The timestamp of the specified revision ID
+       var $mRevisionUser; # User to display in {{REVISIONUSER}} tag
+       var $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
+       var $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
+       var $mInputSize = false; # For {{PAGESIZE}} on current page.
 
-       /** @var bool|int For {{PAGESIZE}} on current page. */
-       protected $mInputSize = false;
+       /**
+        * @var string
+        */
+       var $mUniqPrefix;
 
        /**
         * @var array Array with the language name of each language link (i.e. the
         * interwiki prefix) in the key, value arbitrary. Used to avoid sending
         * duplicate language links to the ParserOutput.
         */
-       protected $mLangLinkLanguages;
+       var $mLangLinkLanguages;
 
        /**
-        * @var int The revision ID which was used to fetch the timestamp
-        * @todo Unused?
+        * @var boolean Recursive call protection.
+        * This variable should be treated as if it were private.
         */
-       private $mRevIdForTs;
+       public $mInParse = false;
 
        /**
         * @param array $conf
@@ -421,7 +320,7 @@ class Parser {
                $this->mStripState = new StripState( $this->mUniqPrefix );
 
                # Clear these on every parse, bug 4549
-               $this->mTplExpandCache = $this->mTplRedirCache = $this->mTplDomCache = array();
+               $this->mTplRedirCache = $this->mTplDomCache = array();
 
                $this->mShowToc = true;
                $this->mForceTocPosition = false;
@@ -718,13 +617,15 @@ class Parser {
        /**
         * Expand templates and variables in the text, producing valid, static wikitext.
         * Also removes comments.
+        * Do not call this function recursively.
         * @param string $text
         * @param Title $title
         * @param ParserOptions $options
         * @param int|null $revid
+        * @param bool|PPFrame $frame
         * @return mixed|string
         */
-       function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null ) {
+       function preprocess( $text, Title $title = null, ParserOptions $options, $revid = null, $frame = false ) {
                wfProfileIn( __METHOD__ );
                $magicScopeVariable = $this->lock();
                $this->startParse( $title, $options, self::OT_PREPROCESS, true );
@@ -733,7 +634,7 @@ class Parser {
                }
                wfRunHooks( 'ParserBeforeStrip', array( &$this, &$text, &$this->mStripState ) );
                wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, &$this->mStripState ) );
-               $text = $this->replaceVariables( $text );
+               $text = $this->replaceVariables( $text, $frame );
                $text = $this->mStripState->unstripBoth( $text );
                wfProfileOut( __METHOD__ );
                return $text;
@@ -1897,13 +1798,12 @@ class Parser {
        /**
         * make an image if it's allowed, either through the global
         * option, through the exception, or through the on-wiki whitelist
-        * @private
         *
-        * $param string $url
+        * @param string $url
         *
         * @return string
         */
-       function maybeMakeExternalImage( $url ) {
+       private function maybeMakeExternalImage( $url ) {
                $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
                $imagesexception = !empty( $imagesfrom );
                $text = false;
@@ -2428,13 +2328,13 @@ class Parser {
                $result = $this->closeParagraph();
 
                if ( '*' === $char ) {
-                       $result .= "<ul>\n<li>";
+                       $result .= "<ul><li>";
                } elseif ( '#' === $char ) {
-                       $result .= "<ol>\n<li>";
+                       $result .= "<ol><li>";
                } elseif ( ':' === $char ) {
-                       $result .= "<dl>\n<dd>";
+                       $result .= "<dl><dd>";
                } elseif ( ';' === $char ) {
-                       $result .= "<dl>\n<dt>";
+                       $result .= "<dl><dt>";
                        $this->mDTopen = true;
                } else {
                        $result = '<!-- ERR 1 -->';
@@ -2478,20 +2378,20 @@ class Parser {
         */
        function closeList( $char ) {
                if ( '*' === $char ) {
-                       $text = "</li>\n</ul>";
+                       $text = "</li></ul>";
                } elseif ( '#' === $char ) {
-                       $text = "</li>\n</ol>";
+                       $text = "</li></ol>";
                } elseif ( ':' === $char ) {
                        if ( $this->mDTopen ) {
                                $this->mDTopen = false;
-                               $text = "</dt>\n</dl>";
+                               $text = "</dt></dl>";
                        } else {
-                               $text = "</dd>\n</dl>";
+                               $text = "</dd></dl>";
                        }
                } else {
                        return '<!-- ERR 3 -->';
                }
-               return $text . "\n";
+               return $text;
        }
        /**#@-*/
 
@@ -2589,6 +2489,9 @@ class Parser {
                                }
 
                                # Open prefixes where appropriate.
+                               if (  $lastPrefix && $prefixLength > $commonPrefixLength ) {
+                                       $output .= "\n";
+                               }
                                while ( $prefixLength > $commonPrefixLength ) {
                                        $char = substr( $prefix, $commonPrefixLength, 1 );
                                        $output .= $this->openList( $char );
@@ -2602,6 +2505,9 @@ class Parser {
                                        }
                                        ++$commonPrefixLength;
                                }
+                               if ( !$prefixLength && $lastPrefix ) {
+                                       $output .= "\n";
+                               }
                                $lastPrefix = $prefix2;
                        }
 
@@ -2683,12 +2589,18 @@ class Parser {
                                $this->mInPre = false;
                        }
                        if ( $paragraphStack === false ) {
-                               $output .= $t . "\n";
+                               $output .= $t;
+                               if ( $prefixLength === 0 ) {
+                                       $output .= "\n";
+                               }
                        }
                }
                while ( $prefixLength ) {
                        $output .= $this->closeList( $prefix2[$prefixLength - 1] );
                        --$prefixLength;
+                       if ( !$prefixLength ) {
+                               $output .= "\n";
+                       }
                }
                if ( $this->mLastSection != '' ) {
                        $output .= '</' . $this->mLastSection . '>';
@@ -3298,15 +3210,16 @@ class Parser {
         *  self::OT_HTML: all templates and extension tags
         *
         * @param string $text The text to transform
-        * @param PPFrame $frame Object describing the arguments passed to the template.
-        *        Arguments may also be provided as an associative array, as was the usual case before MW1.12.
-        *        Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly.
-        * @param bool $argsOnly Only do argument (triple-brace) expansion, not double-brace expansion
-        * @private
-        *
+        * @param bool|PPFrame $frame Object describing the arguments passed to the
+        *   template. Arguments may also be provided as an associative array, as
+        *   was the usual case before MW1.12. Providing arguments this way may be
+        *   useful for extensions wishing to perform variable replacement
+        *   explicitly.
+        * @param bool $argsOnly Only do argument (triple-brace) expansion, not
+        *   double-brace expansion.
         * @return string
         */
-       function replaceVariables( $text, $frame = false, $argsOnly = false ) {
+       public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
                # Is there any text? Also, Prevent too big inclusions!
                if ( strlen( $text ) < 1 || strlen( $text ) > $this->mOptions->getMaxIncludeSize() ) {
                        return $text;
@@ -3401,9 +3314,8 @@ class Parser {
         * @param PPFrame $frame The current frame, contains template arguments
         * @throws Exception
         * @return string The text of the template
-        * @private
         */
-       function braceSubstitution( $piece, $frame ) {
+       public function braceSubstitution( $piece, $frame ) {
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-setup' );
 
@@ -3663,12 +3575,7 @@ class Parser {
                                $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
                        } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
                                # Expansion is eligible for the empty-frame cache
-                               if ( isset( $this->mTplExpandCache[$titleText] ) ) {
-                                       $text = $this->mTplExpandCache[$titleText];
-                               } else {
-                                       $text = $newFrame->expand( $text );
-                                       $this->mTplExpandCache[$titleText] = $text;
-                               }
+                               $text = $newFrame->cachedExpand( $titleText, $text );
                        } else {
                                # Uncached expansion
                                $text = $newFrame->expand( $text );
@@ -6401,4 +6308,25 @@ class Parser {
 
                return $recursiveCheck;
        }
+
+       /**
+        * Strip outer <p></p> tag from the HTML source of a single paragraph.
+        *
+        * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
+        * or if there is more than one <p/> tag in the input HTML.
+        *
+        * @param string $html
+        * @return string
+        * @since 1.24
+        */
+       public static function stripOuterParagraph( $html ) {
+               $m = array();
+               if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
+                       if ( strpos( $m[1], '</p>' ) === false ) {
+                               $html = $m[1];
+                       }
+               }
+
+               return $html;
+       }
 }