Parser cache moved to memcached

[lhc/web/wiklou.git] / includes / Parser.php
diff --git a/includes/Parser.php b/includes/Parser.php

index e65acb3..200c8f4 100644 (file)
--- a/includes/Parser.php
+++ b/includes/Parser.php
@@ -1,6 +1,6 @@
  <?php
  
-require_once('Tokenizer.php');
+// require_once('Tokenizer.php');
  
  if( $GLOBALS['wgUseWikiHiero'] ){
         require_once('extensions/wikihiero/wikihiero.php');
@@ -105,6 +105,10 @@ class Parser
                 # Clean up special characters, only run once, next-to-last before doBlockLevels
                 if(!$wgUseTidy) {
                         $fixtags = array(
+                               # french spaces, last one Guillemet-left
+                               "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1", 
+                               # french spaces, Guillemet-right
+                               "/(\\302\\253) /i"=>"\\1&nbsp;", 
                                 "/<hr *>/i" => '<hr/>',
                                 "/<br *>/i" => '<br/>',
                                 "/<center *>/i"=>'<div class="center">',
@@ -116,6 +120,10 @@ class Parser
                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
                 } else {
                         $fixtags = array(
+                               # french spaces, last one Guillemet-left
+                               "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1", 
+                               # french spaces, Guillemet-right
+                               "/(\\302\\253) /i"=>"\\1&nbsp;", 
                                 "/<center *>/i"=>'<div class="center">',
                                 "/<\\/center *>/i" => '</div>'
                         );
@@ -192,6 +200,7 @@ class Parser
                 $render = ($this->mOutputType == OT_HTML);
                 $nowiki_content = array();
                 $hiero_content = array();
+               $timeline_content = array();
                 $math_content = array();
                 $pre_content = array();
                 $comment_content = array();
@@ -217,6 +226,15 @@ class Parser
                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
                         }
                 }
+               
+               $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
+               foreach( $timeline_content as $marker => $content ){
+                       if( $render && $GLOBALS['wgUseTimeline']){
+                               $timeline_content[$marker] = renderTimeline( $content );
+                       } else {
+                               $timeline_content[$marker] = "<timeline>$content</timeline>";
+                       }
+               }
  
                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
                 foreach( $math_content as $marker => $content ){
@@ -250,6 +268,7 @@ class Parser
                 if ( $state ) {
                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
                         $state['hiero'] = $state['hiero'] + $hiero_content;
+                       $state['timeline'] = $state['timeline'] + $timeline_content;
                         $state['math'] = $state['math'] + $math_content;
                         $state['pre'] = $state['pre'] + $pre_content;
                         $state['comment'] = $state['comment'] + $comment_content;
@@ -257,6 +276,7 @@ class Parser
                         $state = array(
                           'nowiki' => $nowiki_content,
                           'hiero' => $hiero_content,
+                         'timeline' => $timeline_content,
                           'math' => $math_content,
                           'pre' => $pre_content,
                           'comment' => $comment_content
@@ -316,18 +336,11 @@ class Parser
                 $data = array () ;
                 $id = $this->mTitle->getArticleID() ;
  
-               # For existing categories
-               if( $id ) {
-                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
-                       $res = wfQuery ( $sql, DB_READ ) ;
-                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-               } else {
-                       # For non-existing categories
-                       $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
-                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
-                       $res = wfQuery ( $sql, DB_READ ) ;
-                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-               }
+               # FIXME: add limits
+               $t = wfStrencode( $this->mTitle->getDBKey() );
+               $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
+               $res = wfQuery ( $sql, DB_READ ) ;
+               while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
  
                 # For all pages that link to this category
                 foreach ( $data AS $x )
@@ -345,18 +358,14 @@ class Parser
                 wfFreeResult ( $res ) ;
  
                 # Showing subcategories
-               if ( count ( $children ) > 0 )
-               {
-                       asort ( $children ) ;
+               if ( count ( $children ) > 0 ) {
                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
                         $r .= implode ( ", " , $children ) ;
                 }
  
                 # Showing pages in this category
-               if ( count ( $articles ) > 0 )
-               {
+               if ( count ( $articles ) > 0 ) {
                         $ti = $this->mTitle->getText() ;
-                       asort ( $articles ) ;
                         $h =  wfMsg( "category_header", $ti );
                         $r .= "<h2>{$h}</h2>\n" ;
                         $r .= implode ( ", " , $articles ) ;
@@ -547,6 +556,18 @@ class Parser
                 return $t ;
         }
  
+       # Parses the text and adds the result to the strip state
+       # Returns the strip tag
+       function stripParse( $text, $linestart, $args ) 
+       {
+               $text = $this->strip( $text, $this->mStripState );
+               $text = $this->internalParse( $text, $linestart, $args, false );
+               if( $linestart ) {
+                       $text = "\n" . $text;
+               }
+               return $this->insertStripItem( $text, $this->mStripState );
+       }
+       
         function internalParse( $text, $linestart, $args = array(), $isMain=true )
         {
                 $fname = "Parser::internalParse";
@@ -555,24 +576,29 @@ class Parser
                 $text = $this->removeHTMLtags( $text );
                 $text = $this->replaceVariables( $text, $args );
  
-               # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
+               $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
  
                 $text = $this->doHeadings( $text );
                 if($this->mOptions->getUseDynamicDates()) {
                         global $wgDateFormatter;
                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
                 }
+               $text = $this->doAllQuotes( $text );
                 $text = $this->replaceExternalLinks( $text );
-               $text = $this->doTokenizedParser ( $text );
+               $text = $this->replaceInternalLinks ( $text );
+               $text = $this->replaceInternalLinks ( $text );
+               //$text = $this->doTokenizedParser ( $text );
                 $text = $this->doTableStuff ( $text ) ;
+               $text = $this->magicISBN( $text );
+               $text = $this->magicRFC( $text );
                 $text = $this->formatHeadings( $text, $isMain );
                 $sk =& $this->mOptions->getSkin();
                 $text = $sk->transformContent( $text );
  
                 if ( !isset ( $this->categoryMagicDone ) ) {
-                  $text .= $this->categoryMagic () ;
-                  $this->categoryMagicDone = true ;
-                  }
+                       $text .= $this->categoryMagic () ;
+                       $this->categoryMagicDone = true ;
+               }
  
                 wfProfileOut( $fname );
                 return $text;
@@ -589,6 +615,64 @@ class Parser
                 return $text;
         }
  
+       /* private */ function doAllQuotes( $text )
+       {
+               $outtext = "";
+               $lines = explode( "\r\n", $text );
+               foreach ( $lines as $line ) {
+                       $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
+               }
+               return $outtext;
+       }
+
+       /* private */ function doQuotes( $pre, $text, $mode )
+       {
+               if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
+                       $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
+                       $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
+                       if ( substr ($m[2], 0, 1) == "'" ) {
+                               $m[2] = substr ($m[2], 1);
+                               if ($mode == "em") {
+                                       return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
+                               } else if ($mode == "strong") {
+                                       return $m1_strong . $this->doQuotes ( "", $m[2], "" );
+                               } else if (($mode == "emstrong") || ($mode == "both")) {
+                                       return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
+                               } else if ($mode == "strongem") {
+                                       return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
+                               } else {
+                                       return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
+                               }
+                       } else {
+                               if ($mode == "strong") {
+                                       return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
+                               } else if ($mode == "em") {
+                                       return $m1_em . $this->doQuotes ( "", $m[2], "" );
+                               } else if ($mode == "emstrong") {
+                                       return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
+                               } else if (($mode == "strongem") || ($mode == "both")) {
+                                       return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
+                               } else {
+                                       return $m[1] . $this->doQuotes ( "", $m[2], "em" );
+                               }
+                       }
+               } else {
+                       $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
+                       $text_em = ($text == "") ? "" : "<em>{$text}</em>";
+                       if ($mode == "") {
+                               return $pre . $text;
+                       } else if ($mode == "em") {
+                               return $pre . $text_em;
+                       } else if ($mode == "strong") {
+                               return $pre . $text_strong;
+                       } else if ($mode == "strongem") {
+                               return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
+                       } else {
+                               return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
+                       }
+               }
+       }
+
         # Note: we have to do external links before the internal ones,
         # and otherwise take great care in the order of things here, so
         # that we don't end up interpreting some URLs twice.
@@ -675,265 +759,24 @@ class Parser
                 return $s;
         }
  
-       /* private */ function handle4Quotes( &$state, $token )
-       {
-               /* This one makes some assumptions. 
-                * '''Caesar''''s army  => <strong>Caesar</strong>'s army
-                * ''''Caesar'''' was a roman emperor => '<strong>Caesar</strong>' was a roman emperor
-                * These assumptions might be wrong, but any other assumption might be wrong, too.
-                * So here we go */
-               if ( $state["strong"] !== false ) {
-                       return $this->handle3Quotes( $state, $token ) . "'";
-               } else {
-                       return "'" . $this->handle3Quotes( $state, $token );
-               }
-       }
-
-
-       /* private */ function handle3Quotes( &$state, $token )
-       {
-               if ( $state["strong"] !== false ) {
-                       if ( $state["em"] !== false && $state["em"] > $state["strong"] )
-                       {
-                               # ''' lala ''lala '''
-                               $s = "</em></strong><em>";
-                       } else {
-                               $s = "</strong>";
-                       }
-                       $state["strong"] = FALSE;
-               } else {
-                       $s = "<strong>";
-                       $state["strong"] = $token["pos"];
-               }
-               return $s;
-       }
-
-       /* private */ function handle2Quotes( &$state, $token )
-       {
-               if ( $state["em"] !== false ) {
-                       if ( $state["strong"] !== false && $state["strong"] > $state["em"] )
-                       {
-                               # ''lala'''lala'' ....'''
-                               $s = "</strong></em><strong>";
-                       } else {
-                               $s = "</em>";
-                       }
-                       $state["em"] = FALSE;
-               } else {
-                       $s = "<em>";
-                       $state["em"] = $token["pos"];
-
-               }
-               return $s;
-       }
-
-       /* private */ function handle5Quotes( &$state, $token )
-       {
-               $s = "";
-               if ( $state["em"] !== false && $state["strong"] !== false ) {
-                       if ( $state["em"] < $state["strong"] ) {
-                               $s .= "</strong></em>";
-                       } else {
-                               $s .= "</em></strong>";
-                       }
-                       $state["strong"] = $state["em"] = FALSE;
-               } elseif ( $state["em"] !== false ) {
-                       $s .= "</em><strong>";
-                       $state["em"] = FALSE;
-                       $state["strong"] = $token["pos"];
-               } elseif ( $state["strong"] !== false ) {
-                       $s .= "</strong><em>";
-                       $state["strong"] = FALSE;
-                       $state["em"] = $token["pos"];
-               } else { # not $em and not $strong
-                       $s .= "<strong><em>";
-                       $state["strong"] = $state["em"] = $token["pos"];
-               }
-               return $s;
-       }
  
-       /* private */ function doTokenizedParser( $str )
-       {
-               global $wgLang; # for language specific parser hook
-               global $wgUploadDirectory, $wgUseTimeline;
-
-               $tokenizer=Tokenizer::newFromString( $str );
-               $tokenStack = array();
-
-               $s="";
-               $state["em"]      = FALSE;
-               $state["strong"]  = FALSE;
-               $tagIsOpen = FALSE;
-               $threeopen = false;
-
-               # The tokenizer splits the text into tokens and returns them one by one.
-               # Every call to the tokenizer returns a new token.
-               while ( $token = $tokenizer->nextToken() )
-               {
-                       switch ( $token["type"] )
-                       {
-                               case "text":
-                                       # simple text with no further markup
-                                       $txt = $token["text"];
-                                       break;
-                               case "blank":
-                                       # Text that contains blanks that have to be converted to
-                                       # non-breakable spaces for French.
-                                       # U+202F NARROW NO-BREAK SPACE might be a better choice, but
-                                       # browser support for Unicode spacing is poor.
-                                       $txt = str_replace( " ", "&nbsp;", $token["text"] );
-                                       break;
-                               case "[[[":
-                                       # remember the tag opened with 3 [
-                                       $threeopen = true;
-                               case "[[":
-                                       # link opening tag.
-                                       # FIXME : Treat orphaned open tags (stack not empty when text is over)
-                                       $tagIsOpen = TRUE;
-                                       array_push( $tokenStack, $token );
-                                       $txt="";
-                                       break;
-
-                               case "]]]":
-                               case "]]":
-                                       # link close tag.
-                                       # get text from stack, glue it together, and call the code to handle a
-                                       # link
-
-                                       if ( count( $tokenStack ) == 0 )
-                                       {
-                                               # stack empty. Found a ]] without an opening [[
-                                               $txt = "]]";
-                                       } else {
-                                               $linkText = "";
-                                               $lastToken = array_pop( $tokenStack );
-                                               while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) )
-                                               {
-                                                       if( !empty( $lastToken["text"] ) ) {
-                                                               $linkText = $lastToken["text"] . $linkText;
-                                                       }
-                                                       $lastToken = array_pop( $tokenStack );
-                                               }
-
-                                               $txt = $linkText ."]]";
-
-                                               if( isset( $lastToken["text"] ) ) {
-                                                       $prefix = $lastToken["text"];
-                                               } else {
-                                                       $prefix = "";
-                                               }
-                                               $nextToken = $tokenizer->previewToken();
-                                               if ( $nextToken["type"] == "text" )
-                                               {
-                                                       # Preview just looks at it. Now we have to fetch it.
-                                                       $nextToken = $tokenizer->nextToken();
-                                                       $txt .= $nextToken["text"];
-                                               }
-                                               $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix );
-
-                                               # did the tag start with 3 [ ?
-                                               if($threeopen) {
-                                                       # show the first as text
-                                                       $txt = "[".$txt;
-                                                       $threeopen=false;
-                                               }
-
-                                       }
-                                       $tagIsOpen = (count( $tokenStack ) != 0);
-                                       break;
-                               case "----":
-                                       $txt = "\n<hr />\n";
-                                       break;
-                               case "'''":
-                                       # This and the four next ones handle quotes
-                                       $txt = $this->handle3Quotes( $state, $token );
-                                       break;
-                               case "''":
-                                       $txt = $this->handle2Quotes( $state, $token );
-                                       break;
-                               case "'''''":
-                                       $txt = $this->handle5Quotes( $state, $token );
-                                       break;
-                               case "''''":
-                                       $txt = $this->handle4Quotes( $state, $token );
-                                       break;
-                               case "":
-                                       # empty token
-                                       $txt="";
-                                       break;
-                               case "RFC ":
-                                       if ( $tagIsOpen ) {
-                                               $txt = "RFC ";
-                                       } else {
-                                               $txt = $this->doMagicRFC( $tokenizer );
-                                       }
-                                       break;
-                               case "ISBN ":
-                                       if ( $tagIsOpen ) {
-                                               $txt = "ISBN ";
-                                       } else {
-                                               $txt = $this->doMagicISBN( $tokenizer );
-                                       }
-                                       break;
-                               case "<timeline>":
-                                       if ( $wgUseTimeline && 
-                                            "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
-                                       {
-                                               $txt = renderTimeline( $timelinesrc );
-                                       } else {
-                                               $txt=$token["text"];
-                                       }
-                                       break;
-                               default:
-                                       # Call language specific Hook.
-                                       $txt = $wgLang->processToken( $token, $tokenStack );
-                                       if ( NULL == $txt ) {
-                                               # An unkown token. Highlight.
-                                               $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
-                                               $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
-                                       }
-                                       break;
-                       }
-                       # If we're parsing the interior of a link, don't append the interior to $s,
-                       # but push it to the stack so it can be processed when a ]] token is found.
-                       if ( $tagIsOpen  && $txt != "" ) {
-                               $token["type"] = "text";
-                               $token["text"] = $txt;
-                               array_push( $tokenStack, $token );
-                       } else {
-                               $s .= $txt;
-                       }
-               } #end while
-               if ( count( $tokenStack ) != 0 )
-               {
-                       # still objects on stack. opened [[ tag without closing ]] tag.
-                       $txt = "";
-                       while ( $lastToken = array_pop( $tokenStack ) )
-                       {
-                               if ( $lastToken["type"] == "text" )
-                               {
-                                       $txt = $lastToken["text"] . $txt;
-                               } else {
-                                       $txt = $lastToken["type"] . $txt;
-                               }
-                       }
-                       $s .= $txt;
-               }
-               return $s;
-       }
-
-       /* private */ function handleInternalLink( $line, $prefix )
+       /* private */ function replaceInternalLinks( $s )
         {
                 global $wgLang, $wgLinkCache;
                 global $wgNamespacesWithSubpages, $wgLanguageCode;
-               static $fname = "Parser::handleInternalLink" ;
+               static $fname = "Parser::replaceInternalLink" ;
                 wfProfileIn( $fname );
  
                 wfProfileIn( "$fname-setup" );
                 static $tc = FALSE;
-               if ( !$tc ) { $tc = Title::legalChars() . "#"; }
+               # the % is needed to support urlencoded titles as well
+               if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
                 $sk =& $this->mOptions->getSkin();
  
+               $a = explode( "[[", " " . $s );
+               $s = array_shift( $a );
+               $s = substr( $s, 1 );
+
                 # Match a link having the form [[namespace:link|alternate]]trail
                 static $e1 = FALSE;
                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
@@ -956,92 +799,119 @@ class Parser
  
                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
  
+               if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
+                       $new_prefix = $m[2];
+                       $s = $m[1];
+               } else {
+                       $new_prefix="";
+               }
+
                 wfProfileOut( "$fname-setup" );
-               $s = "";
  
-               if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
-                       $text = $m[2];
-                       $trail = $m[3];
-               } else { # Invalid form; output directly
-                       $s .= $prefix . "[[" . $line ;
-                       return $s;
-               }
+               foreach ( $a as $line ) {
+                       $prefix = $new_prefix;
  
-               /* Valid link forms:
-               Foobar -- normal
-               :Foobar -- override special treatment of prefix (images, language links)
-               /Foobar -- convert to CurrentPage/Foobar
-               /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
-               */
-               $c = substr($m[1],0,1);
-               $noforce = ($c != ":");
-               if( $c == "/" ) { # subpage
-                       if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
-                               $m[1]=substr($m[1],1,strlen($m[1])-2);
-                               $noslash=$m[1];
-                       } else {
-                               $noslash=substr($m[1],1);
+                       if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
+                               $text = $m[2];
+                               # fix up urlencoded title texts
+                               if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
+                               $trail = $m[3];
+                       } else { # Invalid form; output directly
+                               $s .= $prefix . "[[" . $line ;
+                               wfProfileOut( $fname );
+                               continue;
                         }
-                       if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
-                               $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
-                               if( "" == $text ) {
-                                       $text= $m[1];
-                               } # this might be changed for ugliness reasons
+
+                       /* Valid link forms:
+                       Foobar -- normal
+                       :Foobar -- override special treatment of prefix (images, language links)
+                       /Foobar -- convert to CurrentPage/Foobar
+                       /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
+                       */
+                       $c = substr($m[1],0,1);
+                       $noforce = ($c != ":");
+                       if( $c == "/" ) { # subpage
+                               if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
+                                       $m[1]=substr($m[1],1,strlen($m[1])-2);
+                                       $noslash=$m[1];
+                               } else {
+                                       $noslash=substr($m[1],1);
+                               }
+                               if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
+                                       $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
+                                       if( "" == $text ) {
+                                               $text= $m[1];
+                                       } # this might be changed for ugliness reasons
+                               } else {
+                                       $link = $noslash; # no subpage allowed, use standard link
+                               }
+                       } elseif( $noforce ) { # no subpage
+                               $link = $m[1];
                         } else {
-                               $link = $noslash; # no subpage allowed, use standard link
+                               $link = substr( $m[1], 1 );
                         }
-               } elseif( $noforce ) { # no subpage
-                       $link = $m[1];
-               } else {
-                       $link = substr( $m[1], 1 );
-               }
-               if( "" == $text )
+                       $wasblank = ( "" == $text );
+                       if( $wasblank )
                         $text = $link;
  
-               $nt = Title::newFromText( $link );
-               if( !$nt ) {
-                       $s .= $prefix . "[[" . $line;
-                       return $s;
-               }
-               $ns = $nt->getNamespace();
-               $iw = $nt->getInterWiki();
-               if( $noforce ) {
-                       if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
-                               array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
-                               $s .= $prefix . $trail ;
-                               return (trim($s) == '')? '': $s;
-                       }
-                       if( $ns == $image ) {
-                               $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
-                               $wgLinkCache->addImageLinkObj( $nt );
-                               return $s;
+                       $nt = Title::newFromText( $link );
+                       if( !$nt ) {
+                               $s .= $prefix . "[[" . $line;
+                               wfProfileOut( $fname );
+                               continue;
                         }
-                       if ( $ns == $category ) {
-                               $t = $nt->getText() ;
-                               $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
-                               $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
-                               $this->mOutput->mCategoryLinks[] = $t ;
-                               $s .= $prefix . $trail ;
-                               return $s ;
+                       $ns = $nt->getNamespace();
+                       $iw = $nt->getInterWiki();
+                       if( $noforce ) {
+                               if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
+                                       array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
+                                       $s .= $prefix . $trail ;
+                                       wfProfileOut( $fname );
+                                       $s .= (trim($s) == '')? '': $s;
+                                       continue;
+                               }
+                               if ( $ns == $image ) {
+                                       $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
+                                       $wgLinkCache->addImageLinkObj( $nt );
+                                       wfProfileOut( $fname );
+                                       continue;
+                               }
+                               if ( $ns == $category ) {
+                                       $t = $nt->getText() ;
+                                       $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+
+                                       $wgLinkCache->suspend(); # Don't save in links/brokenlinks
+                                       $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+                                       $wgLinkCache->resume();
+
+                                       $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
+                                       $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
+                                       $this->mOutput->mCategoryLinks[] = $t ;
+                                       $s .= $prefix . $trail ;
+                                       wfProfileOut( $fname );
+                                       continue;
+                               }
+                       }
+                       if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
+                       ( strpos( $link, "#" ) == FALSE ) ) {
+                               # Self-links are handled specially; generally de-link and change to bold.
+                               $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
+                               wfProfileOut( $fname );
+                               continue;
                         }
-               }
-               if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
-                   ( strpos( $link, "#" ) == FALSE ) ) {
-                       # Self-links are handled specially; generally de-link and change to bold.
-                       $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
-                       return $s;
-               }
  
-               if( $ns == $media ) {
-                       $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
-                       $wgLinkCache->addImageLinkObj( $nt );
-                       return $s;
-               } elseif( $ns == $special ) {
-                       $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
-                       return $s;
+                       if( $ns == $media ) {
+                               $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
+                               $wgLinkCache->addImageLinkObj( $nt );
+                               wfProfileOut( $fname );
+                               continue;
+                       } elseif( $ns == $special ) {
+                               $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
+                               wfProfileOut( $fname );
+                               continue;
+                       }
+                       $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
                 }
-               $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
-
                 wfProfileOut( $fname );
                 return $s;
         }
@@ -1214,10 +1084,10 @@ class Parser
                                 # No prefix (not in list)--go to paragraph mode
                                 $uniq_prefix = UNIQ_PREFIX;
                                 // XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
+                               $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
                                 $closematch = preg_match(
                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
-                                       "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
+                                       "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
                                 if ( $openmatch or $closematch ) {
                                         $paragraphStack = false;
                                         $output .= $this->closeParagraph();
@@ -1229,8 +1099,8 @@ class Parser
                                         } else {
                                                 $inBlockElem = true;
                                         }
-                               } else if ( !$inBlockElem ) {
-                                       if ( " " == $t{0} ) {
+                               } else if ( !$inBlockElem && !$this->mInPre ) {
+                                       if ( " " == $t{0} and trim($t) != '' ) {
                                                 // pre
                                                 if ($this->mLastSection != 'pre') {
                                                         $paragraphStack = false;
@@ -1339,17 +1209,24 @@ class Parser
                         $this->initialiseVariables();
                 }
                 $titleChars = Title::legalChars();
-               $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
  
                 # This function is called recursively. To keep track of arguments we need a stack:
                 array_push( $this->mArgStack, $args );
  
                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
                 $GLOBALS['wgCurParser'] =& $this;
+
+               # Argument substitution
+               if ( $this->mOutputType == OT_HTML ) {
+                       $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
+               }
+               # Double brace substitution
+               $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
  
                 array_pop( $this->mArgStack );
  
+               wfProfileOut( $fname );
                 return $text;
         }
  
@@ -1359,6 +1236,8 @@ class Parser
                 $fname = "Parser::braceSubstitution";
                 $found = false;
                 $nowiki = false;
+               $noparse = false;
+               
                 $title = NULL;
  
                 # $newline is an optional newline character before the braces
@@ -1374,20 +1253,30 @@ class Parser
                         $args = array();
                 }
                 $argc = count( $args );
+       
+               # {{{}}}
+               if ( strpos( $matches[0], "{{{" ) !== false ) {
+                       $text = $matches[0];
+                       $found = true;
+                       $noparse = true;
+               }
  
                 # SUBST
-               $mwSubst =& MagicWord::get( MAG_SUBST );
-               if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
-                       if ( $this->mOutputType != OT_WIKI ) {
-                               # Invalid SUBST not replaced at PST time
-                               # Return without further processing
+               if ( !$found ) {
+                       $mwSubst =& MagicWord::get( MAG_SUBST );
+                       if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+                               if ( $this->mOutputType != OT_WIKI ) {
+                                       # Invalid SUBST not replaced at PST time
+                                       # Return without further processing
+                                       $text = $matches[0];
+                                       $found = true;
+                                       $noparse= true;
+                               }
+                       } elseif ( $this->mOutputType == OT_WIKI ) {
+                               # SUBST not found in PST pass, do nothing
                                 $text = $matches[0];
                                 $found = true;
                         }
-               } elseif ( $this->mOutputType == OT_WIKI ) {
-                       # SUBST not found in PST pass, do nothing
-                       $text = $matches[0];
-                       $found = true;
                 }
  
                 # MSG, MSGNW and INT
@@ -1462,14 +1351,14 @@ class Parser
                         $found = true;
                         $this->mOutput->mContainsOldMagic = true;
                 }
-
+/*
                 # Arguments input from the caller
                 $inputArgs = end( $this->mArgStack );
                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
                         $text = $inputArgs[$part1];
                         $found = true;
                 }
-
+*/
                 # Load from database
                 if ( !$found ) {
                         $title = Title::newFromText( $part1, NS_TEMPLATE );
@@ -1498,7 +1387,7 @@ class Parser
                 # Only for HTML output
                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
                         $text = wfEscapeWikiText( $text );
-               } elseif ( $this->mOutputType == OT_HTML && $found ) {
+               } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
                         # Clean up argument array
                         $assocArgs = array();
                         $index = 1;
@@ -1524,12 +1413,7 @@ class Parser
                         }
  
                         # Run full parser on the included text
-                       $text = $this->strip( $text, $this->mStripState );
-                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs, false );
-
-                       # Add the result to the strip state for re-inclusion after
-                       # the rest of the processing
-                       $text = $this->insertStripItem( $text, $this->mStripState );
+                       $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
  
                         # Resume the link cache and register the inclusion as a link
                         if ( !is_null( $title ) ) {
@@ -1545,6 +1429,21 @@ class Parser
                 }
         }
  
+       # Triple brace replacement -- used for template arguments
+       function argSubstitution( $matches )
+       {
+               $newline = $matches[1];
+               $arg = trim( $matches[2] );
+               $text = $matches[0];
+               $inputArgs = end( $this->mArgStack );
+
+               if ( array_key_exists( $arg, $inputArgs ) ) {
+                       $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
+               }
+               
+               return $text;
+       }
+
         # Returns true if the function is allowed to include this entity
         function incrementIncludeCount( $dbk )
         {
@@ -1795,6 +1694,8 @@ class Parser
                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
                         $tocline = trim( $canonized_headline );
                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+                       # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
+                       $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
                         $refer[$headlineCount] = $canonized_headline;
  
                         # count how many in assoc. array so we can track dupes in anchors
@@ -1874,25 +1775,16 @@ class Parser
                 return $full;
         }
  
-       /* private */ function doMagicISBN( &$tokenizer )
+       /* private */ function magicISBN( $text )
         {
                 global $wgLang;
  
-               # Check whether next token is a text token
-               # If yes, fetch it and convert the text into a
-               # Special::BookSources link
-               $token = $tokenizer->previewToken();
-               while ( $token["type"] == "" )
-               {
-                       $tokenizer->nextToken();
-                       $token = $tokenizer->previewToken();
-               }
-               if ( $token["type"] == "text" )
-               {
-                       $token = $tokenizer->nextToken();
-                       $x = $token["text"];
-                       $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+               $a = split( "ISBN ", " $text" );
+               if ( count ( $a ) < 2 ) return $text;
+               $text = substr( array_shift( $a ), 1);
+               $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  
+               foreach ( $a as $x ) {
                         $isbn = $blank = "" ;
                         while ( " " == $x{0} ) {
                                 $blank .= " ";
@@ -1906,38 +1798,27 @@ class Parser
                         $num = str_replace( " ", "", $num );
  
                         if ( "" == $num ) {
-                               $text = "ISBN $blank$x";
+                               $text .= "ISBN $blank$x";
                         } else {
                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
-                               $text = "<a href=\"" .
+                               $text .= "<a href=\"" .
                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
                                         "\" class=\"internal\">ISBN $isbn</a>";
                                 $text .= $x;
                         }
-               } else {
-                       $text = "ISBN ";
                 }
                 return $text;
         }
-       /* private */ function doMagicRFC( &$tokenizer )
+       /* private */ function magicRFC( $text )
         {
                 global $wgLang;
  
-               # Check whether next token is a text token
-               # If yes, fetch it and convert the text into a
-               # link to an RFC source
-               $token = $tokenizer->previewToken();
-               while ( $token["type"] == "" )
-               {
-                       $tokenizer->nextToken();
-                       $token = $tokenizer->previewToken();
-               }
-               if ( $token["type"] == "text" )
-               {
-                       $token = $tokenizer->nextToken();
-                       $x = $token["text"];
-                       $valid = "0123456789";
+               $a = split( "RFC ", " $text" );
+               if ( count ( $a ) < 2 ) return $text;
+               $text = substr( array_shift( $a ), 1);
+               $valid = "0123456789";
  
+               foreach ( $a as $x ) {
                         $rfc = $blank = "" ;
                         while ( " " == $x{0} ) {
                                 $blank .= " ";
@@ -1955,10 +1836,8 @@ class Parser
                                 $url = str_replace( "$1", $rfc, $url);
                                 $sk =& $this->mOptions->getSkin();
                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
-                               $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
+                               $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
                         }
-               } else {
-                       $text = "RFC ";
                 }
                 return $text;
         }
@@ -2097,6 +1976,7 @@ class Parser
  class ParserOutput
  {
         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
+       var $mTouched; # Used for caching
  
         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
                 $containsOldMagic = false )
@@ -2105,16 +1985,19 @@ class ParserOutput
                 $this->mLanguageLinks = $languageLinks;
                 $this->mCategoryLinks = $categoryLinks;
                 $this->mContainsOldMagic = $containsOldMagic;
+               $this->mTouched = "";
         }
  
         function getText() { return $this->mText; }
         function getLanguageLinks() { return $this->mLanguageLinks; }
         function getCategoryLinks() { return $this->mCategoryLinks; }
+       function getTouched() { return $this->mTouched; }
         function containsOldMagic() { return $this->mContainsOldMagic; }
         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
+       function setTouched( $t ) { return wfSetVar( $this->mTouched, $t ); }
  
         function merge( $other ) {
                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
@@ -2201,7 +2084,167 @@ class ParserOptions
  function wfBraceSubstitution( $matches )
  {
         global $wgCurParser;
+       $titleChars = Title::legalChars();
+
+       # not really nested stuff, just multiple includes separated by titlechars
+       if(preg_match("/^([^}{]*)}}([^}{]*{{)(.*)$/s", $matches[2], $m)) {
+               $text = wfInternalBraceSubstitution( $m[1] );
+               $string = $text.$m[2].$m[3];
+               while(preg_match("/^([^}{]*){{([$titleChars]*?)(}}[^}{]*{{.*)?$/s", $string, $m)) {
+                       $text = wfInternalBraceSubstitution( $m[2] );
+                       $trail = !empty($m[3])? preg_replace("/^}}/", '', $m[3]):'';
+                       $string = $m[1].$text.$trail;
+               }
+               return $string;
+       }
+               
+       # Double brace substitution, expand bar in {{foo{{bar}}}}
+       $i = 0;
+       while(preg_match("/{{([$titleChars]*?)}}/", $matches[2], $internalmatches) and $i < 30) {
+               $text = wfInternalBraceSubstitution( $internalmatches[1] );
+               $matches[0] = str_replace($internalmatches[0], $text , $matches[0]);
+               $matches[2] = str_replace($internalmatches[0], $text , $matches[2]);
+               $i++;
+       }
+
         return $wgCurParser->braceSubstitution( $matches );
  }
  
+function wfArgSubstitution( $matches )
+{
+       global $wgCurParser;
+       return $wgCurParser->argSubstitution( $matches );
+}
+
+# XXX: i don't think this is the most elegant way to do it..
+function wfInternalBraceSubstitution( $part1 ) {
+       global $wgLinkCache, $wgLang, $wgCurParser;
+       $fname = "wfInternalBraceSubstitution";
+       $found = false;
+       $nowiki = false;
+       $noparse = false;
+
+       $title = NULL;
+
+       # $newline is an optional newline character before the braces
+       # $part1 is the bit before the first |, and must contain only title characters
+       # $args is a list of arguments, starting from index 0, not including $part1
+
+       # SUBST
+       if ( !$found ) {
+               $mwSubst =& MagicWord::get( MAG_SUBST );
+               if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+                       if ( $wgCurParser->mOutputType != OT_WIKI ) {
+                               # Invalid SUBST not replaced at PST time
+                               # Return without further processing
+                               $text = $matches[0];
+                               $found = true;
+                               $noparse= true;
+                       }
+               } elseif ( $wgCurParser->mOutputType == OT_WIKI ) {
+                       # SUBST not found in PST pass, do nothing
+                       $text = $matches[0];
+                       $found = true;
+               }
+       }
+
+       # MSG, MSGNW and INT
+       if ( !$found ) {
+               # Check for MSGNW:
+               $mwMsgnw =& MagicWord::get( MAG_MSGNW );
+               if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
+                       $nowiki = true;
+               } else {
+                       # Remove obsolete MSG:
+                       $mwMsg =& MagicWord::get( MAG_MSG );
+                       $mwMsg->matchStartAndRemove( $part1 );
+               }
+
+               # Check if it is an internal message
+               $mwInt =& MagicWord::get( MAG_INT );
+               if ( $mwInt->matchStartAndRemove( $part1 ) ) {
+                       if ( $wgCurParser->incrementIncludeCount( "int:$part1" ) ) {
+                               $text = wfMsgReal( $part1, array(), true );
+                               $found = true;
+                       }
+               }
+       }
+
+       # NS
+       if ( !$found ) {
+               # Check for NS: (namespace expansion)
+               $mwNs = MagicWord::get( MAG_NS );
+               if ( $mwNs->matchStartAndRemove( $part1 ) ) {
+                       if ( intval( $part1 ) ) {
+                               $text = $wgLang->getNsText( intval( $part1 ) );
+                               $found = true;
+                       } else {
+                               $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
+                               if ( !is_null( $index ) ) {
+                                       $text = $wgLang->getNsText( $index );
+                                       $found = true;
+                               }
+                       }
+               }
+       }
+
+       # LOCALURL and LOCALURLE
+       if ( !$found ) {
+               $mwLocal = MagicWord::get( MAG_LOCALURL );
+               $mwLocalE = MagicWord::get( MAG_LOCALURLE );
+
+               if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
+                       $func = 'getLocalURL';
+               } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
+                       $func = 'escapeLocalURL';
+               } else {
+                       $func = '';
+               }
+
+               if ( $func !== '' ) {
+                       $title = Title::newFromText( $part1 );
+                       if ( !is_null( $title ) ) {
+                               $text = $title->$func();
+                               $found = true;
+                       }
+               }
+       }
+
+       # Internal variables
+       if ( !$found && array_key_exists( $part1, $wgCurParser->mVariables ) ) {
+               $text = $wgCurParser->mVariables[$part1];
+               $found = true;
+               $wgCurParser->mOutput->mContainsOldMagic = true;
+       }
+
+       # Load from database
+       if ( !$found ) {
+               $title = Title::newFromText( $part1, NS_TEMPLATE );
+               if ( !is_null( $title ) && !$title->isExternal() ) {
+                       # Check for excessive inclusion
+                       $dbk = $title->getPrefixedDBkey();
+                       if ( $wgCurParser->incrementIncludeCount( $dbk ) ) {
+                               $article = new Article( $title );
+                               $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
+                               if ( $articleContent !== false ) {
+                                       $found = true;
+                                       $text = $articleContent;
+
+                               }
+                       }
+
+                       # If the title is valid but undisplayable, make a link to it
+                       if ( $wgCurParser->mOutputType == OT_HTML && !$found ) {
+                               $text = "[[" . $title->getPrefixedText() . "]]";
+                               $found = true;
+                       }
+               }
+       }
+
+       if ( !$found ) {
+               return $matches[0];
+       } else {
+               return $text;
+       }
+}
  ?>