X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=dc039fd40e32d7184cfa28a40d9f8afd6d2df377;hb=c9097200314fe553d1dc7349360c4c6c3628410a;hp=1f780fb2daf80b00e4595975b7461fcb70048906;hpb=41231616b63c41fcacdc1631330b22c593f27425;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 1f780fb2da..dc039fd40e 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -34,7 +34,7 @@ * Globals used: * objects: $wgLang, $wgContLang * - * NOT $wgArticle, $wgUser or $wgTitle. Keep them away! + * NOT $wgUser or $wgTitle. Keep them away! * * settings: * $wgUseDynamicDates*, $wgInterwikiMagic*, @@ -68,9 +68,11 @@ class Parser { # Constants needed for external link processing # Everything except bracket, space, or control characters - const EXT_LINK_URL_CLASS = '(?:[^\]\[<>"\\x00-\\x20\\x7F]|(?:\[\]))'; - const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+) - \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx'; + # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 + # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 + const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; + const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F\p{Zs}]+) + \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; # State constants for the definition list colon extraction const COLON_STATE_TEXT = 0; @@ -109,7 +111,18 @@ class Parser { var $mImageParamsMagicArray = array(); var $mMarkerIndex = 0; var $mFirstCall = true; - var $mVariables, $mSubstWords; # Initialised by initialiseVariables() + + # Initialised by initialiseVariables() + + /** + * @var MagicWordArray + */ + var $mVariables; + + /** + * @var MagicWordArray + */ + var $mSubstWords; var $mConf, $mPreprocessor, $mExtLinkBracketedRegex, $mUrlProtocols; # Initialised in constructor # Cleared with clearState(): @@ -125,11 +138,17 @@ class Parser { var $mStripState; var $mIncludeCount, $mArgStack, $mLastSection, $mInPre; - var $mLinkHolders, $mLinkID; + /** + * @var LinkHolderArray + */ + var $mLinkHolders; + + var $mLinkID; var $mIncludeSizes, $mPPNodeCount, $mDefaultSort; var $mTplExpandCache; # empty-frame expansion cache var $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; var $mExpensiveFunctionCount; # number of expensive parser function calls + var $mShowToc, $mForceTocPosition; /** * @var User @@ -153,19 +172,29 @@ class Parser { var $mRevisionObject; # The revision object of the specified revision ID var $mRevisionId; # ID to display in {{REVISIONID}} tags var $mRevisionTimestamp; # The timestamp of the specified revision ID - var $mRevisionUser; # Userto display in {{REVISIONUSER}} tag + var $mRevisionUser; # User to display in {{REVISIONUSER}} tag var $mRevIdForTs; # The revision ID which was used to fetch the timestamp + /** + * @var string + */ + var $mUniqPrefix; + /** * Constructor + * + * @param $conf array */ public function __construct( $conf = array() ) { $this->mConf = $conf; $this->mUrlProtocols = wfUrlProtocols(); - $this->mExtLinkBracketedRegex = '/\[(\b(' . wfUrlProtocols() . ')'. - '(?:[^\]\[<>"\x00-\x20\x7F]|\[\])+) *([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/S'; + $this->mExtLinkBracketedRegex = '/\[((' . wfUrlProtocols() . ')'. + self::EXT_LINK_URL_CLASS.'+)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; if ( isset( $conf['preprocessorClass'] ) ) { $this->mPreprocessorClass = $conf['preprocessorClass']; + } elseif ( defined( 'MW_COMPILED' ) ) { + # Preprocessor_Hash is much faster than Preprocessor_DOM in compiled mode + $this->mPreprocessorClass = 'Preprocessor_Hash'; } elseif ( extension_loaded( 'domxml' ) ) { # PECL extension that conflicts with the core DOM extension (bug 13770) wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); @@ -175,6 +204,7 @@ class Parser { } else { $this->mPreprocessorClass = 'Preprocessor_Hash'; } + wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); } /** @@ -320,7 +350,7 @@ class Parser { $fixtags = array( # french spaces, last one Guillemet-left # only if there is something before the space - '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2', + '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', # french spaces, Guillemet-right '/(\\302\\253) /' => '\\1 ', '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. @@ -332,14 +362,16 @@ class Parser { $this->replaceLinkHolders( $text ); /** - * The page doesn't get language converted if + * The input doesn't get language converted if * a) It's disabled * b) Content isn't converted * c) It's a conversion table + * d) it is an interface message (which is in the user language) */ if ( !( $wgDisableLangConversion || isset( $this->mDoubleUnderscores['nocontentconvert'] ) - || $this->mTitle->isConversionTable() ) ) { + || $this->mTitle->isConversionTable() + || $this->mOptions->getInterfaceMessage() ) ) { # The position of the convert() call should not be changed. it # assumes that the links are all replaced and the only thing left @@ -448,6 +480,8 @@ class Parser { * * @param $text String: text extension wants to have parsed * @param $frame PPFrame: The frame to use for expanding any template variables + * + * @return string */ function recursiveTagParse( $text, $frame=false ) { wfProfileIn( __METHOD__ ); @@ -476,6 +510,22 @@ class Parser { return $text; } + /** + * Recursive parser entry point that can be called from an extension tag + * hook. + * + * @param $text String: text to be expanded + * @param $frame PPFrame: The frame to use for expanding any template variables + * @return String + */ + public function recursivePreprocess( $text, $frame = false ) { + wfProfileIn( __METHOD__ ); + $text = $this->replaceVariables( $text, $frame ); + $text = $this->mStripState->unstripBoth( $text ); + wfProfileOut( __METHOD__ ); + return $text; + } + /** * Process the wikitext for the ?preload= feature. (bug 5210) * @@ -495,6 +545,8 @@ class Parser { /** * Get a random string + * + * @return string */ static public function getRandomString() { return dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); @@ -517,7 +569,7 @@ class Parser { */ public function uniqPrefix() { if ( !isset( $this->mUniqPrefix ) ) { - # @todo Fixme: this is probably *horribly wrong* + # @todo FIXME: This is probably *horribly wrong* # LanguageConverter seems to want $wgParser's uniqPrefix, however # if this is called for a parser cache hit, the parser may not # have ever been initialized in the first place. @@ -530,6 +582,8 @@ class Parser { /** * Set the context title + * + * @param $t Title */ function setTitle( $t ) { if ( !$t || $t instanceof FakeTitle ) { @@ -618,10 +672,16 @@ class Parser { return wfSetVar( $this->mOptions, $x ); } + /** + * @return int + */ function nextLinkID() { return $this->mLinkID++; } + /** + * @param $id int + */ function setLinkID( $id ) { $this->mLinkID = $id; } @@ -630,13 +690,13 @@ class Parser { * @return Language */ function getFunctionLang() { - global $wgLang, $wgContLang; + global $wgLang; $target = $this->mOptions->getTargetLanguage(); if ( $target !== null ) { return $target; } else { - return $this->mOptions->getInterfaceMessage() ? $wgLang : $wgContLang; + return $this->mOptions->getInterfaceMessage() ? $wgLang : $this->mTitle->getPageLanguage(); } } @@ -677,10 +737,10 @@ class Parser { * array( 'param' => 'x' ), * 'tag content' ) ) * - * @param $elements list of element names. Comments are always extracted. - * @param $text Source text string. - * @param $matches Out parameter, Array: extracted tags - * @param $uniq_prefix + * @param $elements array list of element names. Comments are always extracted. + * @param $text string Source text string. + * @param $matches array Out parameter, Array: extracted tags + * @param $uniq_prefix string * @return String: stripped text */ public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = '' ) { @@ -747,6 +807,8 @@ class Parser { /** * Get a list of strippable XML-like elements + * + * @return array */ function getStripList() { return $this->mStripList; @@ -757,7 +819,9 @@ class Parser { * Returns the unique tag which must be inserted into the stripped text * The tag will be replaced with the original text in unstrip() * - * @private + * @param $text string + * + * @return string */ function insertStripItem( $text ) { $rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; @@ -770,6 +834,10 @@ class Parser { * parse the wiki syntax used to render tables * * @private + * + * @param $text string + * + * @return string */ function doTableStuff( $text ) { wfProfileIn( __METHOD__ ); @@ -783,7 +851,7 @@ class Parser { # empty line, go to next line, # but only append \n if outside of table - if ( $line === '') { + if ( $line === '') { $output .= $outLine . "\n"; continue; } @@ -806,11 +874,11 @@ class Parser { if ( $attributes !== '' ) { $table['attributes'] = $attributes; } - } else if ( !isset( $tables[0] ) ) { + } elseif ( !isset( $tables[0] ) ) { // we're outside the table $out .= $outLine . "\n"; - } else if ( $firstChars === '|}' ) { + } elseif ( $firstChars === '|}' ) { // trim the |} code from the line $line = substr ( $line , 2 ); @@ -851,7 +919,7 @@ class Parser { $output .= $o; - } else if ( $firstChars === '|-' ) { + } elseif ( $firstChars === '|-' ) { // start a new row element // but only when we haven't started one already if ( count( $currentRow ) != 0 ) { @@ -866,11 +934,11 @@ class Parser { $currentRow['attributes'] = $attributes; } - } else if ( $firstChars === '|+' ) { + } elseif ( $firstChars === '|+' ) { // a table caption, but only proceed if there isn't one already if ( !isset ( $table['caption'] ) ) { $line = substr ( $line , 2 ); - + $c = $this->getCellAttr( $line , 'caption' ); $table['caption'] = array(); $table['caption']['content'] = $c[0]; @@ -878,7 +946,7 @@ class Parser { unset( $c ); $output =& $table['caption']['content']; } - } else if ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { + } elseif ( $firstChars === '|' || $firstChars === '!' || $firstChars === '!+' ) { // Which kind of cells are we dealing with $currentTag = 'td'; $line = substr ( $line , 1 ); @@ -895,7 +963,7 @@ class Parser { // decide whether thead to tbody if ( !array_key_exists( 'type', $currentRow ) ) { $currentRow['type'] = ( $firstChars === '!' ) ? 'thead' : 'tbody' ; - } else if ( $firstChars === '|' ) { + } elseif ( $firstChars === '|' ) { $currentRow['type'] = 'tbody'; } @@ -944,13 +1012,15 @@ class Parser { /** * Helper function for doTableStuff() separating the contents of cells from - * attributes. Particularly useful as there's a possible bug and this action + * attributes. Particularly useful as there's a possible bug and this action * is repeated twice. * * @private + * @param $cell + * @param $tagName + * @return array */ function getCellAttr ( $cell, $tagName ) { - $content = null; $attributes = null; $cell = trim ( $cell ); @@ -963,10 +1033,9 @@ class Parser { if ( strpos( $cellData[0], '[[' ) !== false ) { $content = trim ( $cell ); } - else if ( count ( $cellData ) == 1 ) { + elseif ( count ( $cellData ) == 1 ) { $content = trim ( $cellData[0] ); - } - else { + } else { $attributes = $this->mStripState->unstripBoth( $cellData[0] ); $attributes = Sanitizer::fixTagAttributes( $attributes , $tagName ); @@ -980,10 +1049,13 @@ class Parser { * Helper function for doTableStuff(). This converts the structured array into html. * * @private + * + * @param $table array + * + * @return string */ - function generateTableHTML ( &$table ) { - $return = ""; - $return .= str_repeat( '
' , $table['indent'] ); + function generateTableHTML( &$table ) { + $return = str_repeat( '
' , $table['indent'] ); $return .= ''; } @@ -1067,6 +1137,8 @@ class Parser { * no numric elements and an array itself if not previously defined. * * @private + * + * @param $arr array */ function &last ( &$arr ) { for ( $i = count( $arr ); ( !isset( $arr[$i] ) && $i > 0 ); $i-- ) { } @@ -1078,8 +1150,14 @@ class Parser { * HTML. Only called for $mOutputType == self::OT_HTML. * * @private + * + * @param $text string + * @param $isMain bool + * @param $frame bool + * + * @return string */ - function internalParse( $text, $isMain = true, $frame=false ) { + function internalParse( $text, $isMain = true, $frame = false ) { wfProfileIn( __METHOD__ ); $origText = $text; @@ -1145,10 +1223,14 @@ class Parser { * * DML * @private + * + * @param $text string + * + * @return string */ function doMagicLinks( $text ) { wfProfileIn( __METHOD__ ); - $prots = $this->mUrlProtocols; + $prots = wfUrlProtocolsWithoutProtRel(); $urlChar = self::EXT_LINK_URL_CLASS; $text = preg_replace_callback( '!(?: # Start cases @@ -1161,11 +1243,16 @@ class Parser { (?: [0-9] [\ \-]? ){9} # 9 digits with opt. delimiters [0-9Xx] # check digit \b) - )!x', array( &$this, 'magicLinkCallback' ), $text ); + )!xu', array( &$this, 'magicLinkCallback' ), $text ); wfProfileOut( __METHOD__ ); return $text; } + /** + * @throws MWException + * @param $m array + * @return HTML|string + */ function magicLinkCallback( $m ) { if ( isset( $m[1] ) && $m[1] !== '' ) { # Skip anchor @@ -1213,7 +1300,10 @@ class Parser { /** * Make a free external link, given a user-supplied URL - * @return HTML + * + * @param $url string + * + * @return string HTML * @private */ function makeFreeExternalLink( $url ) { @@ -1266,6 +1356,10 @@ class Parser { * Parse headers and return html * * @private + * + * @param $text string + * + * @return string */ function doHeadings( $text ) { wfProfileIn( __METHOD__ ); @@ -1281,6 +1375,9 @@ class Parser { /** * Replace single quotes with HTML markup * @private + * + * @param $text string + * * @return string the altered text */ function doAllQuotes( $text ) { @@ -1297,6 +1394,10 @@ class Parser { /** * Helper function for doAllQuotes() + * + * @param $text string + * + * @return string */ public function doQuotes( $text ) { $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); @@ -1461,6 +1562,10 @@ class Parser { * Make sure to run maintenance/parserTests.php if you change this code. * * @private + * + * @param $text string + * + * @return string */ function replaceExternalLinks( $text ) { global $wgContLang; @@ -1499,16 +1604,10 @@ class Parser { # No link text, e.g. [http://domain.tld/some.link] if ( $text == '' ) { - # Autonumber if allowed. See bug #5918 - if ( strpos( wfUrlProtocols(), substr( $protocol, 0, strpos( $protocol, ':' ) ) ) !== false ) { - $langObj = $this->getFunctionLang(); - $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; - $linktype = 'autonumber'; - } else { - # Otherwise just use the URL - $text = htmlspecialchars( $url ); - $linktype = 'free'; - } + # Autonumber + $langObj = $this->getFunctionLang(); + $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; + $linktype = 'autonumber'; } else { # Have link text, e.g. [http://domain.tld/some.link text]s # Check for trail @@ -1543,9 +1642,9 @@ class Parser { * (depending on configuration, namespace, and the URL's domain) and/or a * target attribute (depending on configuration). * - * @param $url String: optional URL, to extract the domain from for rel => + * @param $url String|bool optional URL, to extract the domain from for rel => * nofollow if appropriate - * @return Array: associative array of HTML attributes + * @return Array associative array of HTML attributes */ function getExternalLinkAttribs( $url = false ) { $attribs = array(); @@ -1592,6 +1691,10 @@ class Parser { /** * Callback function used in replaceUnusualEscapes(). * Replaces unusual URL escape codes with their equivalent character + * + * @param $matches array + * + * @return string */ private static function replaceUnusualEscapesCallback( $matches ) { $char = urldecode( $matches[0] ); @@ -1610,6 +1713,10 @@ class Parser { * make an image if it's allowed, either through the global * option, through the exception, or through the on-wiki whitelist * @private + * + * $param $url string + * + * @return string */ function maybeMakeExternalImage( $url ) { $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); @@ -1656,6 +1763,9 @@ class Parser { /** * Process [[ ]] wikilinks + * + * @param $s string + * * @return String: processed text * * @private @@ -1802,7 +1912,7 @@ class Parser { # Don't allow internal links to pages containing # PROTO: where PROTO is a valid URL protocol; these # should be external links. - if ( preg_match( '/^\b(?:' . wfUrlProtocols() . ')/', $m[1] ) ) { + if ( preg_match( '/^(?:' . wfUrlProtocols() . ')/', $m[1] ) ) { $s .= $prefix . '[[' . $line ; wfProfileOut( __METHOD__."-misc" ); continue; @@ -1964,7 +2074,7 @@ class Parser { } # NS_MEDIA is a pseudo-namespace for linking directly to a file - # FIXME: Should do batch file existence checks, see comment below + # @todo FIXME: Should do batch file existence checks, see comment below if ( $ns == NS_MEDIA ) { wfProfileIn( __METHOD__."-media" ); # Give extensions a chance to select the file revision for us @@ -1984,7 +2094,7 @@ class Parser { # Some titles, such as valid special pages or files in foreign repos, should # be shown as bluelinks even though they're not included in the page table # - # FIXME: isAlwaysKnown() can be expensive for file links; we should really do + # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do # batch file existence checks for NS_FILE and NS_MEDIA if ( $iw == '' && $nt->isAlwaysKnown() ) { $this->mOutput->addLink( $nt ); @@ -2084,6 +2194,11 @@ class Parser { * getCommon() returns the length of the longest common substring * of both arguments, starting at the beginning of both. * @private + * + * @param $st1 string + * @param $st2 string + * + * @return int */ function getCommon( $st1, $st2 ) { $fl = strlen( $st1 ); @@ -2105,6 +2220,8 @@ class Parser { * element appropriate to the prefix character passed into them. * @private * + * @param $char char + * * @return string */ function openList( $char ) { @@ -2276,7 +2393,7 @@ class Parser { $output .= $this->openList( $char ); if ( ';' === $char ) { - # FIXME: This is dupe of code above + # @todo FIXME: This is dupe of code above if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) { $t = $t2; $output .= $term . $this->nextItem( ':' ); @@ -2368,10 +2485,10 @@ class Parser { * Split up a string on ':', ignoring any occurences inside tags * to prevent illegal overlapping. * - * @param $str String: the string to split - * @param &$before String: set to everything before the ':' - * @param &$after String: set to everything after the ':' - * return String: the position of the ':', or false if none found + * @param $str String the string to split + * @param &$before String set to everything before the ':' + * @param &$after String set to everything after the ':' + * @return String the position of the ':', or false if none found */ function findColonNoLinks( $str, &$before, &$after ) { wfProfileIn( __METHOD__ ); @@ -2536,11 +2653,22 @@ class Parser { * * @param $index integer * @param $frame PPFrame + * + * @return string */ - function getVariableValue( $index, $frame=false ) { + function getVariableValue( $index, $frame = false ) { global $wgContLang, $wgSitename, $wgServer; global $wgArticlePath, $wgScriptPath, $wgStylePath; + if ( is_null( $this->mTitle ) ) { + // If no title set, bad things are going to happen + // later. Title should always be set since this + // should only be called in the middle of a parse + // operation (but the unit-tests do funky stuff) + throw new MWException( __METHOD__ . ' Should only be ' + . ' called while parsing (no title set)' ); + } + /** * Some of these require message or data lookups and can be * expensive to check many times. @@ -2575,48 +2703,50 @@ class Parser { date_default_timezone_set( $oldtz ); } + $pageLang = $this->getFunctionLang(); + switch ( $index ) { case 'currentmonth': - $value = $wgContLang->formatNum( gmdate( 'm', $ts ) ); + $value = $pageLang->formatNum( gmdate( 'm', $ts ) ); break; case 'currentmonth1': - $value = $wgContLang->formatNum( gmdate( 'n', $ts ) ); + $value = $pageLang->formatNum( gmdate( 'n', $ts ) ); break; case 'currentmonthname': - $value = $wgContLang->getMonthName( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthName( gmdate( 'n', $ts ) ); break; case 'currentmonthnamegen': - $value = $wgContLang->getMonthNameGen( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthNameGen( gmdate( 'n', $ts ) ); break; case 'currentmonthabbrev': - $value = $wgContLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); + $value = $pageLang->getMonthAbbreviation( gmdate( 'n', $ts ) ); break; case 'currentday': - $value = $wgContLang->formatNum( gmdate( 'j', $ts ) ); + $value = $pageLang->formatNum( gmdate( 'j', $ts ) ); break; case 'currentday2': - $value = $wgContLang->formatNum( gmdate( 'd', $ts ) ); + $value = $pageLang->formatNum( gmdate( 'd', $ts ) ); break; case 'localmonth': - $value = $wgContLang->formatNum( $localMonth ); + $value = $pageLang->formatNum( $localMonth ); break; case 'localmonth1': - $value = $wgContLang->formatNum( $localMonth1 ); + $value = $pageLang->formatNum( $localMonth1 ); break; case 'localmonthname': - $value = $wgContLang->getMonthName( $localMonthName ); + $value = $pageLang->getMonthName( $localMonthName ); break; case 'localmonthnamegen': - $value = $wgContLang->getMonthNameGen( $localMonthName ); + $value = $pageLang->getMonthNameGen( $localMonthName ); break; case 'localmonthabbrev': - $value = $wgContLang->getMonthAbbreviation( $localMonthName ); + $value = $pageLang->getMonthAbbreviation( $localMonthName ); break; case 'localday': - $value = $wgContLang->formatNum( $localDay ); + $value = $pageLang->formatNum( $localDay ); break; case 'localday2': - $value = $wgContLang->formatNum( $localDay2 ); + $value = $pageLang->formatNum( $localDay2 ); break; case 'pagename': $value = wfEscapeWikiText( $this->mTitle->getText() ); @@ -2741,68 +2871,68 @@ class Parser { $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); break; case 'currentdayname': - $value = $wgContLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); + $value = $pageLang->getWeekdayName( gmdate( 'w', $ts ) + 1 ); break; case 'currentyear': - $value = $wgContLang->formatNum( gmdate( 'Y', $ts ), true ); + $value = $pageLang->formatNum( gmdate( 'Y', $ts ), true ); break; case 'currenttime': - $value = $wgContLang->time( wfTimestamp( TS_MW, $ts ), false, false ); + $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); break; case 'currenthour': - $value = $wgContLang->formatNum( gmdate( 'H', $ts ), true ); + $value = $pageLang->formatNum( gmdate( 'H', $ts ), true ); break; case 'currentweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $wgContLang->formatNum( (int)gmdate( 'W', $ts ) ); + $value = $pageLang->formatNum( (int)gmdate( 'W', $ts ) ); break; case 'currentdow': - $value = $wgContLang->formatNum( gmdate( 'w', $ts ) ); + $value = $pageLang->formatNum( gmdate( 'w', $ts ) ); break; case 'localdayname': - $value = $wgContLang->getWeekdayName( $localDayOfWeek + 1 ); + $value = $pageLang->getWeekdayName( $localDayOfWeek + 1 ); break; case 'localyear': - $value = $wgContLang->formatNum( $localYear, true ); + $value = $pageLang->formatNum( $localYear, true ); break; case 'localtime': - $value = $wgContLang->time( $localTimestamp, false, false ); + $value = $pageLang->time( $localTimestamp, false, false ); break; case 'localhour': - $value = $wgContLang->formatNum( $localHour, true ); + $value = $pageLang->formatNum( $localHour, true ); break; case 'localweek': # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to # int to remove the padding - $value = $wgContLang->formatNum( (int)$localWeek ); + $value = $pageLang->formatNum( (int)$localWeek ); break; case 'localdow': - $value = $wgContLang->formatNum( $localDayOfWeek ); + $value = $pageLang->formatNum( $localDayOfWeek ); break; case 'numberofarticles': - $value = $wgContLang->formatNum( SiteStats::articles() ); + $value = $pageLang->formatNum( SiteStats::articles() ); break; case 'numberoffiles': - $value = $wgContLang->formatNum( SiteStats::images() ); + $value = $pageLang->formatNum( SiteStats::images() ); break; case 'numberofusers': - $value = $wgContLang->formatNum( SiteStats::users() ); + $value = $pageLang->formatNum( SiteStats::users() ); break; case 'numberofactiveusers': - $value = $wgContLang->formatNum( SiteStats::activeUsers() ); + $value = $pageLang->formatNum( SiteStats::activeUsers() ); break; case 'numberofpages': - $value = $wgContLang->formatNum( SiteStats::pages() ); + $value = $pageLang->formatNum( SiteStats::pages() ); break; case 'numberofadmins': - $value = $wgContLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); + $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); break; case 'numberofedits': - $value = $wgContLang->formatNum( SiteStats::edits() ); + $value = $pageLang->formatNum( SiteStats::edits() ); break; case 'numberofviews': - $value = $wgContLang->formatNum( SiteStats::views() ); + $value = $pageLang->formatNum( SiteStats::views() ); break; case 'currenttimestamp': $value = wfTimestamp( TS_MW, $ts ); @@ -2829,7 +2959,7 @@ class Parser { case 'stylepath': return $wgStylePath; case 'directionmark': - return $wgContLang->getDirMark(); + return $pageLang->getDirMark(); case 'contentlanguage': global $wgLanguageCode; return $wgLanguageCode; @@ -2842,8 +2972,9 @@ class Parser { } } - if ( $index ) + if ( $index ) { $this->mVarCache[$index] = $value; + } return $value; } @@ -2895,6 +3026,8 @@ class Parser { /** * Return a three-element array: leading whitespace, string contents, trailing whitespace * + * @param $s string + * * @return array */ public static function splitWhitespace( $s ) { @@ -2920,11 +3053,11 @@ class Parser { * self::OT_PREPROCESS: templates but not extension tags * self::OT_HTML: all templates and extension tags * - * @param $text String: the text to transform + * @param $text String the text to transform * @param $frame PPFrame Object describing the arguments passed to the template. * Arguments may also be provided as an associative array, as was the usual case before MW1.12. * Providing arguments this way may be useful for extensions wishing to perform variable replacement explicitly. - * @param $argsOnly Boolean: only do argument (triple-brace) expansion, not double-brace expansion + * @param $argsOnly Boolean only do argument (triple-brace) expansion, not double-brace expansion * @private * * @return string @@ -2954,6 +3087,8 @@ class Parser { /** * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. * + * @param $args array + * * @return array */ static function createAssocArgs( $args ) { @@ -3041,9 +3176,10 @@ class Parser { $originalTitle = $part1; # $args is a list of argument nodes, starting from index 0, not including $part1 - # *** FIXME if piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object + # @todo FIXME: If piece['parts'] is null then the call to getLength() below won't work b/c this $args isn't an object $args = ( null == $piece['parts'] ) ? array() : $piece['parts']; wfProfileOut( __METHOD__.'-setup' ); + wfProfileIn( __METHOD__."-title-$originalTitle" ); # SUBST wfProfileIn( __METHOD__.'-modifiers' ); @@ -3212,8 +3348,24 @@ class Parser { && $this->mOptions->getAllowSpecialInclusion() && $this->ot['html'] ) { - $text = SpecialPageFactory::capturePath( $title ); - if ( is_string( $text ) ) { + $pageArgs = array(); + for ( $i = 0; $i < $args->getLength(); $i++ ) { + $bits = $args->item( $i )->splitArg(); + if ( strval( $bits['index'] ) === '' ) { + $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); + $value = trim( $frame->expand( $bits['value'] ) ); + $pageArgs[$name] = $value; + } + } + $context = new RequestContext; + $context->setTitle( $title ); + $context->setRequest( new FauxRequest( $pageArgs ) ); + $context->setUser( $this->getUser() ); + $context->setLang( Language::factory( $this->mOptions->getUserLang() ) ); + $ret = SpecialPageFactory::capturePath( $title, $context ); + if ( $ret ) { + $text = $context->getOutput()->getHTML(); + $this->mOutput->addOutputPageMetadata( $context->getOutput() ); $found = true; $isHTML = true; $this->disableCache(); @@ -3262,6 +3414,7 @@ class Parser { # Recover the source wikitext and return it if ( !$found ) { $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return array( 'object' => $text ); } @@ -3330,6 +3483,7 @@ class Parser { $ret = array( 'text' => $text ); } + wfProfileOut( __METHOD__."-title-$originalTitle" ); wfProfileOut( __METHOD__ ); return $ret; } @@ -3338,6 +3492,8 @@ class Parser { * Get the semi-parsed DOM representation of a template with a given title, * and its redirect destination title. Cached. * + * @param $title Title + * * @return array */ function getTemplateDom( $title ) { @@ -3404,6 +3560,9 @@ class Parser { * Static function to get a template * Can be overridden via ParserOptions::setTemplateCallback(). * + * @parma $title Title + * @param $parser Parser + * * @return array */ static function statelessFetchTemplate( $title, $parser = false ) { @@ -3481,7 +3640,7 @@ class Parser { * @param Title $title * @param string $time MW timestamp * @param string $sha1 base 36 SHA-1 - * @return mixed File or false + * @return File|false */ function fetchFile( $title, $time = false, $sha1 = false ) { $res = $this->fetchFileAndTitle( $title, $time, $sha1 ); @@ -3508,8 +3667,14 @@ class Parser { # Register the file as a dependency... $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); if ( $file && !$title->equals( $file->getTitle() ) ) { - # Update fetched file title + # Update fetched file title $title = $file->getTitle(); + if ( is_null( $file->getRedirectedTitle() ) ) { + # This file was not a redirect, but the title does not match. + # Register under the new name because otherwise the link will + # get lost. + $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); + } } return array( $file, $title ); } @@ -3517,6 +3682,9 @@ class Parser { /** * Transclude an interwiki link. * + * @param $title Title + * @param $action + * * @return string */ function interwikiTransclude( $title, $action ) { @@ -3735,6 +3903,10 @@ class Parser { /** * Strip double-underscore items like __NOGALLERY__ and __NOTOC__ * Fills $this->mDoubleUnderscores, returns the modified text + * + * @param $text string + * + * @return string */ function doDoubleUnderscore( $text ) { wfProfileIn( __METHOD__ ); @@ -3767,7 +3939,7 @@ class Parser { } # (bug 8068) Allow control over whether robots index a page. # - # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This + # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This # is not desirable, the last one on the page should win. if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { $this->mOutput->setIndexPolicy( 'noindex' ); @@ -3828,7 +4000,7 @@ class Parser { * @private */ function formatHeadings( $text, $origText, $isMain=true ) { - global $wgMaxTocLevel, $wgContLang, $wgHtml5, $wgExperimentalHtmlIds; + global $wgMaxTocLevel, $wgHtml5, $wgExperimentalHtmlIds; # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { @@ -3965,7 +4137,7 @@ class Parser { if ( $dot ) { $numbering .= '.'; } - $numbering .= $wgContLang->formatNum( $sublevelCount[$i] ); + $numbering .= $this->getFunctionLang()->formatNum( $sublevelCount[$i] ); $dot = 1; } } @@ -4023,7 +4195,7 @@ class Parser { # HTML names must be case-insensitively unique (bug 10721). # This does not apply to Unicode characters per # http://dev.w3.org/html5/spec/infrastructure.html#case-sensitivity-and-string-comparison - # FIXME: We may be changing them depending on the current locale. + # @todo FIXME: We may be changing them depending on the current locale. $arrayKey = strtolower( $safeHeadline ); if ( $legacyHeadline === false ) { $legacyArrayKey = false; @@ -4141,30 +4313,42 @@ class Parser { } # split up and insert constructed headlines - $blocks = preg_split( '/.*?<\/H[1-6]>/i', $text ); $i = 0; + // build an array of document sections + $sections = array(); foreach ( $blocks as $block ) { - if ( $showEditLink && $headlineCount > 0 && $i == 0 && $block !== "\n" ) { - # This is the [edit] link that appears for the top block of text when - # section editing is enabled - - # Disabled because it broke block formatting - # For example, a bullet point in the top line - # $full .= $sk->editSectionLink(0); - } - $full .= $block; - if ( $enoughToc && !$i && $isMain && !$this->mForceTocPosition ) { - # Top anchor now in skin - $full = $full.$toc; + // $head is zero-based, sections aren't. + if ( empty( $head[$i - 1] ) ) { + $sections[$i] = $block; + } else { + $sections[$i] = $head[$i - 1] . $block; } - if ( !empty( $head[$i] ) ) { - $full .= $head[$i]; - } + /** + * Send a hook, one per section. + * The idea here is to be able to make section-level DIVs, but to do so in a + * lower-impact, more correct way than r50769 + * + * $this : caller + * $section : the section number + * &$sectionContent : ref to the content of the section + * $showEditLinks : boolean describing whether this section has an edit link + */ + wfRunHooks( 'ParserSectionCreate', array( $this, $i, &$sections[$i], $showEditLink ) ); + $i++; } + + if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { + // append the TOC at the beginning + // Top anchor now in skin + $sections[0] = $sections[0] . $toc . "\n"; + } + + $full .= join( '', $sections ); + if ( $this->mForceTocPosition ) { return str_replace( '', $toc, $full ); } else { @@ -4204,6 +4388,11 @@ class Parser { /** * Pre-save transform helper function * @private + * + * @param $text string + * @param $user User + * + * @return string */ function pstPass2( $text, $user ) { global $wgContLang, $wgLocaltimezone; @@ -4259,9 +4448,9 @@ class Parser { $tc = "[$wgLegalTitleChars]"; $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! - $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\))\\|]]/"; # [[ns:page (context)|]] - $p4 = "/\[\[(:?$nc+:|:|)($tc+?)(($tc+))\\|]]/"; # [[ns:page(context)|]] - $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( \\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] + $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; # [[ns:page (context)|]] + $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; # [[ns:page(context)|]] + $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)(, $tc+|)\\|]]/"; # [[ns:page (context), context|]] $p2 = "/\[\[\\|($tc+)]]/"; # [[|page]] # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" @@ -4295,8 +4484,8 @@ class Parser { * as it may have changed if it's the $wgParser. * * @param $user User - * @param $nickname String: nickname to use or false to use user's default nickname - * @param $fancySig Boolean: whether the nicknname is the complete signature + * @param $nickname String|bool nickname to use or false to use user's default nickname + * @param $fancySig Boolean|null whether the nicknname is the complete signature * or null to use default value * @return string */ @@ -4336,11 +4525,9 @@ class Parser { # If we're still here, make it a link to the user page $userText = wfEscapeWikiText( $username ); $nickText = wfEscapeWikiText( $nickname ); - if ( $user->isAnon() ) { - return wfMsgExt( 'signature-anon', array( 'content', 'parsemag' ), $userText, $nickText ); - } else { - return wfMsgExt( 'signature', array( 'content', 'parsemag' ), $userText, $nickText ); - } + $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; + + return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()->title( $this->getTitle() )->text(); } /** @@ -4360,16 +4547,13 @@ class Parser { * 2) Substitute all transclusions * * @param $text String - * @param $parsing Whether we're cleaning (preferences save) or parsing + * @param $parsing bool Whether we're cleaning (preferences save) or parsing * @return String: signature text */ function cleanSig( $text, $parsing = false ) { if ( !$parsing ) { global $wgTitle; - $this->mOptions = new ParserOptions; - $this->clearState(); - $this->setTitle( $wgTitle ); - $this->setOutputType = self::OT_PREPROCESS; + $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true ); } # Option to disable this feature @@ -4377,7 +4561,7 @@ class Parser { return $text; } - # FIXME: regex doesn't respect extension tags or nowiki + # @todo FIXME: Regex doesn't respect extension tags or nowiki # => Move this logic to braceSubstitution() $substWord = MagicWord::get( 'subst' ); $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); @@ -4410,11 +4594,22 @@ class Parser { /** * Set up some variables which are usually set up in parse() * so that an external function can call some class members with confidence + * + * @param $title Title|null + * @param $options ParserOptions + * @param $outputType + * @param $clearState bool */ public function startExternalParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { $this->startParse( $title, $options, $outputType, $clearState ); } + /** + * @param $title Title|null + * @param $options ParserOptions + * @param $outputType + * @param $clearState bool + */ private function startParse( Title $title = null, ParserOptions $options, $outputType, $clearState = true ) { $this->setTitle( $title ); $this->mOptions = $options; @@ -4526,6 +4721,19 @@ class Parser { $this->mStripList = $this->mDefaultStripList; } + /** + * Remove a specific tag hook. Should not be called on $wgParser. + * Does not change the strip list. + * + * @param string $tag + * @return void + */ + function clearTagHook( $tag ) { + if ( isset( $this->mTagHooks[$tag] ) ) { + unset( $this->mTagHooks[$tag] ); + } + } + /** * Create a function, e.g. {{sum:1|2|3}} * The callback function should have the form: @@ -4629,10 +4837,14 @@ class Parser { } /** - * FIXME: update documentation. makeLinkObj() is deprecated. + * @todo FIXME: Update documentation. makeLinkObj() is deprecated. * Replace link placeholders with actual links, in the buffer * Placeholders created in Skin::makeLinkObj() - * Returns an array of link CSS classes, indexed by PDBK. + * + * @param $text string + * @param $options int + * + * @return array of link CSS classes, indexed by PDBK. */ function replaceLinkHolders( &$text, $options = 0 ) { return $this->mLinkHolders->replace( $text ); @@ -4659,7 +4871,7 @@ class Parser { * 'A tree'. * * @param string $text - * @param array $param + * @param array $params * @return string HTML */ function renderImageGallery( $text, $params ) { @@ -4713,14 +4925,14 @@ class Parser { # Bogus title. Ignore these so we don't bomb out later. continue; } - + $label = ''; $alt = ''; if ( isset( $matches[3] ) ) { // look for an |alt= definition while trying not to break existing // captions with multiple pipes (|) in it, until a more sensible grammar // is defined for images in galleries - + $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); $altmatches = StringUtils::explode('|', $matches[3]); $magicWordAlt = MagicWord::get( 'img_alt' ); @@ -4744,6 +4956,10 @@ class Parser { return $ig->toHTML(); } + /** + * @param $handler + * @return array + */ function getImageParams( $handler ) { if ( $handler ) { $handlerClass = get_class( $handler ); @@ -4789,7 +5005,7 @@ class Parser { * * @param $title Title * @param $options String - * @param $holders LinkHolderArray + * @param $holders LinkHolderArray|false * @return string HTML */ function makeImage( $title, $options, $holders = false ) { @@ -4879,7 +5095,7 @@ class Parser { switch( $paramName ) { case 'manualthumb': case 'alt': - # @todo Fixme: possibly check validity here for + # @todo FIXME: Possibly check validity here for # manualthumb? downstream behavior seems odd with # missing manual thumbs. $validated = true; @@ -4893,7 +5109,7 @@ class Parser { $value = true; $validated = true; } elseif ( preg_match( "/^$prots/", $value ) ) { - if ( preg_match( "/^($prots)$chars+$/", $value, $m ) ) { + if ( preg_match( "/^($prots)$chars+$/u", $value, $m ) ) { $paramName = 'link-url'; $this->mOutput->addExternalLink( $value ); if ( $this->mOptions->getExternalLinkTarget() ) { @@ -4996,6 +5212,11 @@ class Parser { return $ret; } + /** + * @param $caption + * @param $holders LinkHolderArray + * @return mixed|String + */ protected function stripAltText( $caption, $holders ) { # Strip bad stuff out of the title (tooltip). We can't just use # replaceLinkHoldersText() here, because if this function is called @@ -5053,11 +5274,16 @@ class Parser { * * Transparent tag hooks are like regular XML-style tag hooks, except they * operate late in the transformation sequence, on HTML instead of wikitext. + * + * @param $text string + * + * @return string */ function replaceTransparentTags( $text ) { $matches = array(); $elements = array_keys( $this->mTransparentTagHooks ); $text = self::extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix ); + $replacements = array(); foreach ( $matches as $marker => $data ) { list( $element, $content, $params, $tag ) = $data; @@ -5067,9 +5293,9 @@ class Parser { } else { $output = $tag; } - $this->mStripState->addGeneral( $marker, $output ); + $replacements[$marker] = $output; } - return $text; + return strtr( $text, $replacements ); } /** @@ -5096,6 +5322,8 @@ class Parser { * @param $newText String: replacement text for section data. * @return String: for "get", the extracted section text. * for "replace", the whole page with the section replaced. + * If the page is empty and section 0 is requested, $text (as '') + * is returned */ private function extractSections( $text, $section, $mode, $newText='' ) { global $wgTitle; # not generally used but removes an ugly failure mode @@ -5336,6 +5564,10 @@ class Parser { * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from * "== Header ==". + * + * @param $text string + * + * @return string */ public function guessSectionNameFromWikiText( $text ) { # Strip out wikitext links(they break the anchor) @@ -5378,7 +5610,8 @@ class Parser { $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); - # Strip external link markup (FIXME: Not Tolerant to blank link text + # Strip external link markup + # @todo FIXME: Not tolerant to blank link text # I.E. [http://www.mediawiki.org] will render as [1] or something depending # on how many empty links there are on the page - need to figure that out. $text = preg_replace( '/\[(?:' . wfUrlProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); @@ -5394,12 +5627,14 @@ class Parser { /** * strip/replaceVariables/unstrip for preprocessor regression testing * + * @param $text string + * @param $title Title + * @param $options ParserOptions + * @param $outputType int + * * @return string */ - function testSrvus( $text, $title, ParserOptions $options, $outputType = self::OT_HTML ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + function testSrvus( $text, Title $title, ParserOptions $options, $outputType = self::OT_HTML ) { $this->startParse( $title, $options, $outputType, true ); $text = $this->replaceVariables( $text ); @@ -5408,18 +5643,23 @@ class Parser { return $text; } - function testPst( $text, $title, $options ) { - global $wgUser; - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } - return $this->preSaveTransform( $text, $title, $wgUser, $options ); + /** + * @param $text string + * @param $title Title + * @param $options ParserOptions + * @return string + */ + function testPst( $text, Title $title, ParserOptions $options ) { + return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); } - function testPreprocess( $text, $title, $options ) { - if ( !$title instanceof Title ) { - $title = Title::newFromText( $title ); - } + /** + * @param $text + * @param $title Title + * @param $options ParserOptions + * @return string + */ + function testPreprocess( $text, Title $title, ParserOptions $options ) { return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); } @@ -5434,6 +5674,9 @@ class Parser { * two strings will be replaced with the value returned by the callback in * each case. * + * @param $s string + * @param $callback + * * @return string */ function markerSkipCallback( $s, $callback ) { @@ -5472,6 +5715,8 @@ class Parser { * unserializeHalfParsedText(). The text can then be safely incorporated into * the return value of a parser hook. * + * @param $text string + * * @return array */ function serializeHalfParsedText( $text ) {