Don't use the parser cache for non-existent articles
[lhc/web/wiklou.git] / includes / Parser.php
index f04c371..f35a47c 100644 (file)
@@ -1,11 +1,12 @@
 <?php
-
 /**
  * File for Parser and related classes
  *
  * @package MediaWiki
+ * @subpackage Parser
  */
 
+/** */
 require_once( 'Sanitizer.php' );
 
 /**
@@ -13,7 +14,7 @@ require_once( 'Sanitizer.php' );
  * changes in an incompatible way, so the parser cache
  * can automatically discard old data.
  */
-define( 'MW_PARSER_VERSION', '1.4.0' );
+define( 'MW_PARSER_VERSION', '1.5.0' );
 
 /**
  * Variable substitution O(N^2) attack
@@ -172,15 +173,25 @@ class Parser
                $text = $this->strip( $text, $x );
 
                $text = $this->internalParse( $text, $linestart );
+
+               $dashReplace = array(
+                       '/ - /' => "&nbsp;&ndash; ", # N dash
+                       '/(?<=[0-9])-(?=[0-9])/' => "&ndash;", # N dash between numbers
+                       '/ -- /' => "&nbsp;&mdash; " # M dash
+               );
+               $text = preg_replace( array_keys($dashReplace), array_values($dashReplace), $text );
+               
+               
                $text = $this->unstrip( $text, $this->mStripState );
                # Clean up special characters, only run once, next-to-last before doBlockLevels
+               global $wgUseTidy;
                if(!$wgUseTidy) {
                        $fixtags = array(
                                # french spaces, last one Guillemet-left
                                # only if there is something before the space
-                               '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
+                               '/(.) (?=\\?|:|;|!|\\302\\273)/' => '\\1&nbsp;\\2',
                                # french spaces, Guillemet-right
-                               '/(\\302\\253) /i' => '\\1&nbsp;',
+                               '/(\\302\\253) /' => '\\1&nbsp;',
                                '/<hr *>/i' => '<hr />',
                                '/<br *>/i' => '<br />',
                                '/<center *>/i' => '<div class="center">',
@@ -191,9 +202,9 @@ class Parser
                } else {
                        $fixtags = array(
                                # french spaces, last one Guillemet-left
-                               '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
+                               '/ (\\?|:|;|!|\\302\\273)/' => '&nbsp;\\1',
                                # french spaces, Guillemet-right
-                               '/(\\302\\253) /i' => '\\1&nbsp;',
+                               '/(\\302\\253) /' => '\\1&nbsp;',
                                '/<center *>/i' => '<div class="center">',
                                '/<\\/center *>/i' => '</div>'
                        );
@@ -204,9 +215,8 @@ class Parser
 
                $this->replaceLinkHolders( $text );
                $text = $wgContLang->convert($text);
-
+               $this->mOutput->setTitleText($wgContLang->getParsedTitle());
                $text = $this->unstripNoWiki( $text, $this->mStripState );
-               global $wgUseTidy;
                if ($wgUseTidy) {
                        $text = Parser::tidy($text);
                }
@@ -249,7 +259,7 @@ class Parser
 
                while ( '' != $text ) {
                        if($tag==STRIP_COMMENTS) {
-                               $p = preg_split( '/<!--/i', $text, 2 );
+                               $p = preg_split( '/<!--/', $text, 2 );
                        } else {
                                $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
                        }
@@ -475,22 +485,11 @@ class Parser
         */
        function tidy ( $text ) {
                global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
-               global $wgInputEncoding, $wgOutputEncoding;
                $fname = 'Parser::tidy';
                wfProfileIn( $fname );
 
                $cleansource = '';
-               $opts = '';
-               switch(strtoupper($wgOutputEncoding)) {
-                       case 'ISO-8859-1':
-                               $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
-                               break;
-                       case 'UTF-8':
-                               $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
-                               break;
-                       default:
-                               $opts .= ' -raw';
-                       }
+               $opts = ' -utf8';
 
                $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
@@ -542,8 +541,7 @@ class Parser
                        $fc = substr ( $x , 0 , 1 ) ;
                        if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
                                $indent_level = strlen( $matches[1] );
-                               $t[$k] = "\n" .
-                                       str_repeat( '<dl><dd>', $indent_level ) .
+                               $t[$k] = str_repeat( '<dl><dd>', $indent_level ) .
                                        '<table' . Sanitizer::fixTagAttributes ( $matches[2], 'table' ) . '>' ;
                                array_push ( $td , false ) ;
                                array_push ( $ltd , '' ) ;
@@ -552,7 +550,7 @@ class Parser
                        }
                        else if ( count ( $td ) == 0 ) { } # Don't do any of the following
                        else if ( '|}' == substr ( $x , 0 , 2 ) ) {
-                               $z = "</table>\n" ;
+                               $z = "</table>" . substr ( $x , 2);
                                $l = array_pop ( $ltd ) ;
                                if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
                                if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
@@ -1153,6 +1151,14 @@ class Parser
                        
                        if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
                                $text = $m[2];
+                               # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
+                               # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
+                               # the real problem is with the $e1 regex
+                               # See bug 1300.
+                               if (preg_match( "/^\](.*)/", $m[3], $n ) ) {
+                                       $text .= ']'; # so that replaceExternalLinks($text) works later
+                                       $m[3] = $n[1];
+                               }
                                # fix up urlencoded title texts
                                if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
                                $trail = $m[3];
@@ -1249,31 +1255,35 @@ class Parser
                                # Interwikis
                                if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
                                        array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
-                                       $tmp = $prefix . $trail ;
-                                       $s .= (trim($tmp) == '')? '': $tmp;
+                                       $s = rtrim($s . "\n");
+                                       $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
                                        continue;
                                }
                                
                                if ( $ns == NS_IMAGE ) {
                                        wfProfileIn( "$fname-image" );
-                                       
-                                       # recursively parse links inside the image caption
-                                       # actually, this will parse them in any other parameters, too,
-                                       # but it might be hard to fix that, and it doesn't matter ATM
-                                       $text = $this->replaceExternalLinks($text);
-                                       $text = $this->replaceInternalLinks($text);
-                                       
-                                       # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
-                                       $s .= $prefix . str_replace('http://', 'http-noparse://', $sk->makeImageLinkObj( $nt, $text ) ) . $trail;
-                                       $wgLinkCache->addImageLinkObj( $nt );
-                                       
+                                       if ( !wfIsBadImage( $nt->getDBkey() ) ) {
+                                               # recursively parse links inside the image caption
+                                               # actually, this will parse them in any other parameters, too,
+                                               # but it might be hard to fix that, and it doesn't matter ATM
+                                               $text = $this->replaceExternalLinks($text);
+                                               $text = $this->replaceInternalLinks($text);
+                                               
+                                               # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
+                                               $s .= $prefix . str_replace('http://', 'http-noparse://', $sk->makeImageLinkObj( $nt, $text ) ) . $trail;
+                                               $wgLinkCache->addImageLinkObj( $nt );
+                                               
+                                               wfProfileOut( "$fname-image" );
+                                               continue;
+                                       }
                                        wfProfileOut( "$fname-image" );
-                                       continue;
+
                                }
                                
                                if ( $ns == NS_CATEGORY ) {
                                        wfProfileIn( "$fname-category" );
                                        $t = $nt->getText();
+                                       $s = rtrim($s . "\n"); # bug 87
 
                                        $wgLinkCache->suspend(); # Don't save in links/brokenlinks
                                        $pPLC=$sk->postParseLinkColour();
@@ -1293,7 +1303,12 @@ class Parser
                                        }
                                        $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
                                        $this->mOutput->addCategoryLink( $t );
-                                       $s .= $prefix . $trail ;
+                                       
+                                       /**
+                                        * Strip the whitespace Category links produce, see bug 87
+                                        * @todo We might want to use trim($tmp, "\n") here.
+                                        */
+                                       $s .= trim($prefix . $trail, "\n") == '' ? '': $prefix . $trail;
                                        
                                        wfProfileOut( "$fname-category" );
                                        continue;
@@ -1687,7 +1702,7 @@ class Parser
         * @access private
         */
        function getVariableValue( $index ) {
-               global $wgContLang, $wgSitename, $wgServer;
+               global $wgContLang, $wgSitename, $wgServer, $wgArticle;
                
                /**
                 * Some of these require message or data lookups and can be
@@ -1703,19 +1718,23 @@ class Parser
                                return $varCache[$index] = $wgContLang->getMonthName( date('n') );
                        case MAG_CURRENTMONTHNAMEGEN:
                                return $varCache[$index] = $wgContLang->getMonthNameGen( date('n') );
+                       case MAG_CURRENTMONTHABBREV:
+                               return $varCache[$index] = $wgContLang->getMonthAbbreviation( date('n') );
                        case MAG_CURRENTDAY:
                                return $varCache[$index] = $wgContLang->formatNum( date('j') );
                        case MAG_PAGENAME:
                                return $this->mTitle->getText();
                        case MAG_PAGENAMEE:
                                return $this->mTitle->getPartialURL();
+                       case MAG_REVISIONID:
+                               return $wgArticle->getRevIdFetched();
                        case MAG_NAMESPACE:
                                # return Namespace::getCanonicalName($this->mTitle->getNamespace());
                                return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
                        case MAG_CURRENTDAYNAME:
                                return $varCache[$index] = $wgContLang->getWeekdayName( date('w')+1 );
                        case MAG_CURRENTYEAR:
-                               return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ) );
+                               return $varCache[$index] = $wgContLang->formatNum( date( 'Y' ), true );
                        case MAG_CURRENTTIME:
                                return $varCache[$index] = $wgContLang->time( wfTimestampNow(), false );
                        case MAG_CURRENTWEEK:
@@ -1841,7 +1860,7 @@ class Parser
                # merged with the next arg because the '|' character between belongs
                # to the link syntax and not the template parameter syntax.
                $argc = count($args);
-               $i = 0;
+               
                for ( $i = 0; $i < $argc-1; $i++ ) {
                        if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
                                $args[$i] .= '|'.$args[$i+1];
@@ -2180,14 +2199,12 @@ class Parser
                global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders, $wgInterwikiLinkHolders;
 
                $doNumberHeadings = $this->mOptions->getNumberHeadings();
-               $doShowToc = $this->mOptions->getShowToc();
+               $doShowToc = true;
                $forceTocHere = false;
                if( !$this->mTitle->userCanEdit() ) {
                        $showEditLink = 0;
-                       $rightClickHack = 0;
                } else {
                        $showEditLink = $this->mOptions->getEditSection();
-                       $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
                }
 
                # Inhibit editsection links if requested in the page
@@ -2199,13 +2216,7 @@ class Parser
                # do not add TOC
                $mw =& MagicWord::get( MAG_NOTOC );
                if( $mw->matchAndRemove( $text ) ) {
-                       $doShowToc = 0;
-               }
-
-               # never add the TOC to the Main Page. This is an entry page that should not
-               # be more than 1-2 screens large anyway
-               if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
-                       $doShowToc = 0;
+                       $doShowToc = false;
                }
 
                # Get all headlines for numbering them and adding funky stuff like [edit]
@@ -2214,7 +2225,7 @@ class Parser
 
                # if there are fewer than 4 headlines in the article, do not show TOC
                if( $numMatches < 4 ) {
-                       $doShowToc = 0;
+                       $doShowToc = false;
                }
 
                # if the string __TOC__ (not case-sensitive) occurs in the HTML,
@@ -2222,20 +2233,20 @@ class Parser
 
                $mw =& MagicWord::get( MAG_TOC );
                if($mw->match( $text ) ) {
-                       $doShowToc = 1;
+                       $doShowToc = true;
                        $forceTocHere = true;
                } else {
                        # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
                        # override above conditions and always show TOC above first header
                        $mw =& MagicWord::get( MAG_FORCETOC );
                        if ($mw->matchAndRemove( $text ) ) {
-                               $doShowToc = 1;
+                               $doShowToc = true;
                        }
                }
 
                # Never ever show TOC if no headers
                if( $numMatches < 1 ) {
-                       $doShowToc = 0;
+                       $doShowToc = false;
                }
 
                # We need this to perform operations on the HTML
@@ -2384,14 +2395,6 @@ class Parser
                                        $head[$headlineCount] .= $sk->editSectionLink($this->mTitle, $sectionCount+1);
                        }
 
-                       # Add the edit section span
-                       if( $rightClickHack ) {
-                               if( $istemplate )
-                                       $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
-                               else
-                                       $headline = $sk->editSectionScript($this->mTitle, $sectionCount+1,$headline);
-                       }
-
                        # give headline the correct <h#> tag
                        @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
 
@@ -2454,7 +2457,7 @@ class Parser
                        return $text;
                }
                $text = substr( array_shift( $a ), 1);
-               $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+               $valid = '0123456789-Xx';
 
                foreach ( $a as $x ) {
                        $isbn = $blank = '' ;
@@ -2472,6 +2475,7 @@ class Parser
                        }
                        $num = str_replace( '-', '', $isbn );
                        $num = str_replace( ' ', '', $num );
+                       $num = str_replace( 'x', 'X', $num );
 
                        if ( '' == $num ) {
                                $text .= "ISBN $blank$x";
@@ -2489,8 +2493,12 @@ class Parser
 
        /**
         * Return an HTML link for the "RFC 1234" text
+        *
         * @access private
-        * @param string $text text to be processed
+        * @param string $text     Text to be processed
+        * @param string $keyword  Magic keyword to use (default RFC)
+        * @param string $urlmsg   Interface message to use (default rfcurl)
+        * @return string
         */
        function magicRFC( $text, $keyword='RFC ', $urlmsg='rfcurl'  ) {
                global $wgLang;
@@ -2541,8 +2549,7 @@ class Parser
                                $text .= $keyword.$id.$x;
                        } else {
                                /* build the external link*/
-                               $url = wfmsg( $urlmsg );
-                               $url = str_replace( '$1', $id, $url);
+                               $url = wfMsg( $urlmsg, $id);
                                $sk =& $this->mOptions->getSkin();
                                $la = $sk->getExternalLinkAttributes( $url, $keyword.$id );
                                $text .= "<a href='{$url}'{$la}>{$keyword}{$id}</a>{$x}";
@@ -2607,8 +2614,13 @@ class Parser
                        $oldtz = getenv( 'TZ' );
                        putenv( 'TZ='.$wgLocaltimezone );
                }
-               /* Note: this is an ugly timezone hack for the European wikis */
-               $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
+
+               /* Note: This is the timestamp saved as hardcoded wikitext to
+                * the database, we use $wgContLang here in order to give
+                * everyone the same signiture and use the default one rather
+                * than the one selected in each users preferences.
+                */
+               $d = $wgContLang->timeanddate( wfTimestampNow(), false, false) .
                  ' (' . date( 'T' ) . ')';
                if ( isset( $wgLocaltimezone ) ) {
                        putenv( 'TZ='.$oldtzs );
@@ -2770,16 +2782,11 @@ class Parser
                                        # Not in the link cache, add it to the query
                                        if ( !isset( $current ) ) {
                                                $current = $val;
-                                               $tables = $page;
-                                               $join = '';
                                                $query =  "SELECT page_id, page_namespace, page_title";
                                                if ( $threshold > 0 ) {
-                                                       $textTable = $dbr->tableName( 'text' );
-                                                       $query .= ', LENGTH(old_text) AS page_len, page_is_redirect';
-                                                       $tables .= ", $textTable";
-                                                       $join = 'page_latest=old_id AND';
+                                                       $query .= ', page_len, page_is_redirect';
                                                }
-                                               $query .= " FROM $tables WHERE $join (page_namespace=$val AND page_title IN(";
+                                               $query .= " FROM $page WHERE (page_namespace=$val AND page_title IN(";
                                        } elseif ( $current != $val ) {
                                                $current = $val;
                                                $query .= ")) OR (page_namespace=$val AND page_title IN(";
@@ -2880,6 +2887,10 @@ class Parser
         * 'A tree'.
         */
        function renderImageGallery( $text ) {
+               # Setup the parser
+               global $wgUser, $wgParser, $wgTitle;
+               $parserOptions = ParserOptions::newFromUser( $wgUser );
+       
                global $wgLinkCache;
                $ig = new ImageGallery();
                $ig->setShowBytes( false );
@@ -2905,11 +2916,10 @@ class Parser
                                $label = '';
                        }
                        
-                       # FIXME: Use the full wiki parser and add its links
-                       # to the page's links.
-                       $html = $this->mOptions->mSkin->formatComment( $label );
+                       $html = $wgParser->parse( $label , $wgTitle, $parserOptions );
+                       $html = $html->mText;
                        
-                       $ig->add( Image::newFromTitle( $nt ), $html );
+                       $ig->add( new Image( $nt ), $html );
                        $wgLinkCache->addImageLinkObj( $nt );
                }
                return $ig->toHTML();
@@ -2925,9 +2935,10 @@ class ParserOutput
        var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
        var $mCacheTime; # Used in ParserCache
        var $mVersion;   # Compatibility check
+       var $mTitleText; # title text of the chosen language variant
 
        function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
-               $containsOldMagic = false )
+               $containsOldMagic = false, $titletext = '' )
        {
                $this->mText = $text;
                $this->mLanguageLinks = $languageLinks;
@@ -2935,18 +2946,22 @@ class ParserOutput
                $this->mContainsOldMagic = $containsOldMagic;
                $this->mCacheTime = '';
                $this->mVersion = MW_PARSER_VERSION;
+               $this->mTitleText = $titletext;
        }
 
        function getText()                   { return $this->mText; }
        function getLanguageLinks()          { return $this->mLanguageLinks; }
        function getCategoryLinks()          { return array_keys( $this->mCategoryLinks ); }
        function getCacheTime()              { return $this->mCacheTime; }
+       function getTitleText()              { return $this->mTitleText; }
        function containsOldMagic()          { return $this->mContainsOldMagic; }
        function setText( $text )            { return wfSetVar( $this->mText, $text ); }
        function setLanguageLinks( $ll )     { return wfSetVar( $this->mLanguageLinks, $ll ); }
        function setCategoryLinks( $cl )     { return wfSetVar( $this->mCategoryLinks, $cl ); }
        function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
        function setCacheTime( $t )          { return wfSetVar( $this->mCacheTime, $t ); }
+       function setTitleText( $t )          { return wfSetVar ($this->mTitleText, $t); }
+
        function addCategoryLink( $c )       { $this->mCategoryLinks[$c] = 1; }
 
        function merge( $other ) {
@@ -2988,9 +3003,7 @@ class ParserOptions
        var $mSkin;                      # Reference to the preferred skin
        var $mDateFormat;                # Date format index
        var $mEditSection;               # Create "edit section" links
-       var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
        var $mNumberHeadings;            # Automatically number headings
-       var $mShowToc;                   # Show table of contents
 
        function getUseTeX()                        { return $this->mUseTeX; }
        function getUseDynamicDates()               { return $this->mUseDynamicDates; }
@@ -2999,9 +3012,7 @@ class ParserOptions
        function getSkin()                          { return $this->mSkin; }
        function getDateFormat()                    { return $this->mDateFormat; }
        function getEditSection()                   { return $this->mEditSection; }
-       function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
        function getNumberHeadings()                { return $this->mNumberHeadings; }
-       function getShowToc()                       { return $this->mShowToc; }
 
        function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
        function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
@@ -3009,9 +3020,7 @@ class ParserOptions
        function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
        function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
        function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
-       function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
        function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
-       function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
 
        function setSkin( &$x ) { $this->mSkin =& $x; }
 
@@ -3046,9 +3055,7 @@ class ParserOptions
                wfProfileOut( $fname.'-skin' );
                $this->mDateFormat = $user->getOption( 'date' );
                $this->mEditSection = $user->getOption( 'editsection' );
-               $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
                $this->mNumberHeadings = $user->getOption( 'numberheadings' );
-               $this->mShowToc = $user->getOption( 'showtoc' );
                wfProfileOut( $fname );
        }
 }