Changing template argument syntax from {{arg}} to {{{arg}}}
[lhc/web/wiklou.git] / includes / Parser.php
index 6613832..09026de 100644 (file)
@@ -1,9 +1,12 @@
 <?php
 
-include_once('Tokenizer.php');
+require_once('Tokenizer.php');
 
 if( $GLOBALS['wgUseWikiHiero'] ){
-       include_once('wikihiero.php');
+       require_once('extensions/wikihiero/wikihiero.php');
+}
+if( $GLOBALS['wgUseTimeline'] ){
+       require_once('extensions/timeline/Timeline.php');
 }
 
 # PHP Parser
@@ -41,6 +44,12 @@ define( "OT_HTML", 1 );
 define( "OT_WIKI", 2 );
 define( "OT_MSG", 3 );
 
+# string parameter for extractTags which will cause it
+# to strip HTML comments in addition to regular
+# <XML>-style tags. This should not be anything we
+# may want to use in wikisyntax
+define( "STRIP_COMMENTS", "HTMLCommentStrip" );
+
 # prefix for escaping, used in two functions at least
 define( "UNIQ_PREFIX", "NaodW29");
 
@@ -77,6 +86,7 @@ class Parser
        #
        function parse( $text, &$title, $options, $linestart = true, $clearState = true )
        {
+               global $wgUseTidy;
                $fname = "Parser::parse";
                wfProfileIn( $fname );
 
@@ -93,20 +103,29 @@ class Parser
                $text = $this->internalParse( $text, $linestart );
                $text = $this->unstrip( $text, $this->mStripState );
                # Clean up special characters, only run once, next-to-last before doBlockLevels
-               $fixtags = array(
-                       "/<hr *>/i" => '<hr/>',
-                       "/<br *>/i" => '<br/>',
-                       "/<center *>/i"=>'<div class="center">',
-                       "/<\\/center *>/i" => '</div>',
-                       # Clean up spare ampersands; note that we probably ought to be
-                       # more careful about named entities.
-                       '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
-               );
-               $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
-
+               if(!$wgUseTidy) {
+                       $fixtags = array(
+                               "/<hr *>/i" => '<hr/>',
+                               "/<br *>/i" => '<br/>',
+                               "/<center *>/i"=>'<div class="center">',
+                               "/<\\/center *>/i" => '</div>',
+                               # Clean up spare ampersands; note that we probably ought to be
+                               # more careful about named entities.
+                               '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
+                       );
+                       $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+               } else {
+                       $fixtags = array(
+                               "/<center *>/i"=>'<div class="center">',
+                               "/<\\/center *>/i" => '</div>'
+                       );
+                       $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
+               }
                # only once and last
                $text = $this->doBlockLevels( $text, $linestart );
-
+               if($wgUseTidy) {
+                       $text = $this->tidy($text);
+               }
                $this->mOutput->setText( $text );
                wfProfileOut( $fname );
                return $this->mOutput;
@@ -124,6 +143,9 @@ class Parser
 
        # If $content is already set, the additional entries will be appended
 
+       # If $tag is set to STRIP_COMMENTS, the function will extract
+       # <!-- HTML comments -->
+
        /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
                $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
                if ( !$content ) {
@@ -133,12 +155,20 @@ class Parser
                $stripped = "";
 
                while ( "" != $text ) {
-                       $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       if($tag==STRIP_COMMENTS) {
+                               $p = preg_split( "/<!--/i", $text, 2 );
+                       } else {
+                               $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
+                       }
                        $stripped .= $p[0];
                        if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
                                $text = "";
                        } else {
-                               $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               if($tag==STRIP_COMMENTS) {
+                                       $q = preg_split( "/-->/i", $p[1], 2 );
+                               } else {
+                                       $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
+                               }
                                $marker = $rnd . sprintf("%08X", $n++);
                                $content[$marker] = $q[0];
                                $stripped .= $marker;
@@ -148,18 +178,23 @@ class Parser
                return $stripped;
        }
 
-       # Strips <nowiki>, <pre> and <math>
+       # Strips and renders <nowiki>, <pre>, <math>, <hiero>
+       # If $render is set, performs necessary rendering operations on plugins
        # Returns the text, and fills an array with data needed in unstrip()
        # If the $state is already a valid strip state, it adds to the state
-       #
-       function strip( $text, &$state )
+
+       # When $stripcomments is set, HTML comments <!-- like this -->
+       # will be stripped in addition to other tags. This is important
+       # for section editing, where these comments cause confusion when
+       # counting the sections in the wikisource
+       function strip( $text, &$state, $stripcomments = false )
        {
                $render = ($this->mOutputType == OT_HTML);
                $nowiki_content = array();
                $hiero_content = array();
                $math_content = array();
                $pre_content = array();
-               $item_content = array();
+               $comment_content = array();
 
                # Replace any instances of the placeholders
                $uniq_prefix = UNIQ_PREFIX;
@@ -174,25 +209,25 @@ class Parser
                        }
                }
 
-               if( $GLOBALS['wgUseWikiHiero'] ){
-                       $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
-                       foreach( $hiero_content as $marker => $content ){
-                               if( $render ){
-                                       $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
-                               } else {
-                                       $hiero_content[$marker] = "<hiero>$content</hiero>";
-                               }
+               $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+               foreach( $hiero_content as $marker => $content ){
+                       if( $render && $GLOBALS['wgUseWikiHiero']){
+                               $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+                       } else {
+                               $hiero_content[$marker] = "<hiero>$content</hiero>";
                        }
                }
 
-               if( $this->mOptions->getUseTeX() ){
-                       $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
-                       foreach( $math_content as $marker => $content ){
-                               if( $render ){
+               $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+               foreach( $math_content as $marker => $content ){
+                       if( $render ) {
+                               if( $this->mOptions->getUseTeX() ) {
                                        $math_content[$marker] = renderMath( $content );
                                } else {
-                                       $math_content[$marker] = "<math>$content</math>";
+                                       $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
                                }
+                       } else {
+                               $math_content[$marker] = "<math>$content</math>";
                        }
                }
 
@@ -204,6 +239,12 @@ class Parser
                                $pre_content[$marker] = "<pre>$content</pre>";
                        }
                }
+               if($stripcomments) {
+                       $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
+                       foreach( $comment_content as $marker => $content ){
+                               $comment_content[$marker] = "<!--$content-->";
+                       }
+               }
 
                # Merge state with the pre-existing state, if there is one
                if ( $state ) {
@@ -211,13 +252,14 @@ class Parser
                        $state['hiero'] = $state['hiero'] + $hiero_content;
                        $state['math'] = $state['math'] + $math_content;
                        $state['pre'] = $state['pre'] + $pre_content;
+                       $state['comment'] = $state['comment'] + $comment_content;
                } else {
                        $state = array(
                          'nowiki' => $nowiki_content,
                          'hiero' => $hiero_content,
                          'math' => $math_content,
                          'pre' => $pre_content,
-                         'item' => $item_content
+                         'comment' => $comment_content
                        );
                }
                return $text;
@@ -248,8 +290,7 @@ class Parser
                          'nowiki' => array(),
                          'hiero' => array(),
                          'math' => array(),
-                         'pre' => array(),
-                         'item' => array()
+                         'pre' => array()
                        );
                }
                $state['item'][$rnd] = $text;
@@ -275,18 +316,11 @@ class Parser
                $data = array () ;
                $id = $this->mTitle->getArticleID() ;
 
-               # For existing categories
-               if( $id ) {
-                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id";
-                       $res = wfQuery ( $sql, DB_READ ) ;
-                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-               } else {
-                       # For non-existing categories
-                       $t = wfStrencode( $this->mTitle->getPrefixedDBKey() );
-                       $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ;
-                       $res = wfQuery ( $sql, DB_READ ) ;
-                       while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
-               }
+               # FIXME: add limits
+               $t = wfStrencode( $this->mTitle->getDBKey() );
+               $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
+               $res = wfQuery ( $sql, DB_READ ) ;
+               while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 
                # For all pages that link to this category
                foreach ( $data AS $x )
@@ -304,18 +338,14 @@ class Parser
                wfFreeResult ( $res ) ;
 
                # Showing subcategories
-               if ( count ( $children ) > 0 )
-               {
-                       asort ( $children ) ;
+               if ( count ( $children ) > 0 ) {
                        $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
                        $r .= implode ( ", " , $children ) ;
                }
 
                # Showing pages in this category
-               if ( count ( $articles ) > 0 )
-               {
+               if ( count ( $articles ) > 0 ) {
                        $ti = $this->mTitle->getText() ;
-                       asort ( $articles ) ;
                        $h =  wfMsg( "category_header", $ti );
                        $r .= "<h2>{$h}</h2>\n" ;
                        $r .= implode ( ", " , $articles ) ;
@@ -365,6 +395,47 @@ class Parser
                return trim ( $t ) ;
        }
 
+       /* interface with html tidy, used if $wgUseTidy = true */
+       function tidy ( $text ) {
+               global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+               global $wgInputEncoding, $wgOutputEncoding;
+               $cleansource = '';
+               switch(strtoupper($wgOutputEncoding)) {
+                       case 'ISO-8859-1':
+                               $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+                               break;
+                       case 'UTF-8':
+                               $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+                               break;
+                       default:
+                               $wgTidyOpts .= ' -raw';
+                       }
+
+               $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
+               $descriptorspec = array(
+                       0 => array("pipe", "r"),
+                       1 => array("pipe", "w"),
+                       2 => array("file", "/dev/null", "a")
+               );
+               $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
+               if (is_resource($process)) {
+                       fwrite($pipes[0], $text);
+                       fclose($pipes[0]);
+                       while (!feof($pipes[1])) {
+                               $cleansource .= fgets($pipes[1], 1024);
+                       }
+                       fclose($pipes[1]);
+                       $return_value = proc_close($process);
+               }
+               if( $cleansource == '' && $text != '') {
+                       return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
+               } else {
+                       return $cleansource;
+               }
+       }
+
        function doTableStuff ( $t )
        {
                $t = explode ( "\n" , $t ) ;
@@ -465,7 +536,19 @@ class Parser
                return $t ;
        }
 
-       function internalParse( $text, $linestart, $args = array() )
+       # Parses the text and adds the result to the strip state
+       # Returns the strip tag
+       function stripParse( $text, $linestart, $args ) 
+       {
+               $text = $this->strip( $text, $this->mStripState );
+               $text = $this->internalParse( $text, $linestart, $args, false );
+               if( $linestart ) {
+                       $text = "\n" . $text;
+               }
+               return $this->insertStripItem( $text, $this->mStripState );
+       }
+       
+       function internalParse( $text, $linestart, $args = array(), $isMain=true )
        {
                $fname = "Parser::internalParse";
                wfProfileIn( $fname );
@@ -483,14 +566,14 @@ class Parser
                $text = $this->replaceExternalLinks( $text );
                $text = $this->doTokenizedParser ( $text );
                $text = $this->doTableStuff ( $text ) ;
-               $text = $this->formatHeadings( $text );
+               $text = $this->formatHeadings( $text, $isMain );
                $sk =& $this->mOptions->getSkin();
                $text = $sk->transformContent( $text );
 
                if ( !isset ( $this->categoryMagicDone ) ) {
-                  $text .= $this->categoryMagic () ;
-                  $this->categoryMagicDone = true ;
-                  }
+                       $text .= $this->categoryMagic () ;
+                       $this->categoryMagicDone = true ;
+               }
 
                wfProfileOut( $fname );
                return $text;
@@ -593,6 +676,21 @@ class Parser
                return $s;
        }
 
+       /* private */ function handle4Quotes( &$state, $token )
+       {
+               /* This one makes some assumptions. 
+                * '''Caesar''''s army  => <strong>Caesar</strong>'s army
+                * ''''Caesar'''' was a roman emperor => '<strong>Caesar</strong>' was a roman emperor
+                * These assumptions might be wrong, but any other assumption might be wrong, too.
+                * So here we go */
+               if ( $state["strong"] !== false ) {
+                       return $this->handle3Quotes( $state, $token ) . "'";
+               } else {
+                       return "'" . $this->handle3Quotes( $state, $token );
+               }
+       }
+
+
        /* private */ function handle3Quotes( &$state, $token )
        {
                if ( $state["strong"] !== false ) {
@@ -606,7 +704,7 @@ class Parser
                        $state["strong"] = FALSE;
                } else {
                        $s = "<strong>";
-                       $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["strong"] = $token["pos"];
                }
                return $s;
        }
@@ -624,7 +722,7 @@ class Parser
                        $state["em"] = FALSE;
                } else {
                        $s = "<em>";
-                       $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["em"] = $token["pos"];
 
                }
                return $s;
@@ -650,7 +748,7 @@ class Parser
                        $state["em"] = $token["pos"];
                } else { # not $em and not $strong
                        $s .= "<strong><em>";
-                       $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["strong"] = $state["em"] = $token["pos"];
                }
                return $s;
        }
@@ -658,6 +756,7 @@ class Parser
        /* private */ function doTokenizedParser( $str )
        {
                global $wgLang; # for language specific parser hook
+               global $wgUploadDirectory, $wgUseTimeline;
 
                $tokenizer=Tokenizer::newFromString( $str );
                $tokenStack = array();
@@ -747,7 +846,7 @@ class Parser
                                        $txt = "\n<hr />\n";
                                        break;
                                case "'''":
-                                       # This and the three next ones handle quotes
+                                       # This and the four next ones handle quotes
                                        $txt = $this->handle3Quotes( $state, $token );
                                        break;
                                case "''":
@@ -756,10 +855,26 @@ class Parser
                                case "'''''":
                                        $txt = $this->handle5Quotes( $state, $token );
                                        break;
+                               case "''''":
+                                       $txt = $this->handle4Quotes( $state, $token );
+                                       break;
                                case "":
                                        # empty token
                                        $txt="";
                                        break;
+                               case "h": 
+                                       #heading- used to close all unbalanced bold or em tags in this section
+                                       $txt = '';
+                                       if( $state['em'] !== false and 
+                                       ( $state['strong'] === false or $state['em'] > $state['strong'] ) )
+                                       { 
+                                               $s .= '</em>';
+                                               $state['em'] = false;
+                                       }
+                                       if ( $state['strong'] !== false ) $txt .= '</strong>';
+                                       if ( $state['em'] !== false ) $txt .= '</em>';
+                                       $state['strong'] = $state['em'] = false;
+                                       break;
                                case "RFC ":
                                        if ( $tagIsOpen ) {
                                                $txt = "RFC ";
@@ -774,6 +889,15 @@ class Parser
                                                $txt = $this->doMagicISBN( $tokenizer );
                                        }
                                        break;
+                               case "<timeline>":
+                                       if ( $wgUseTimeline && 
+                                            "" != ( $timelinesrc = $tokenizer->readAllUntil("&lt;/timeline&gt;") ) )
+                                       {
+                                               $txt = renderTimeline( $timelinesrc );
+                                       } else {
+                                               $txt=$token["text"];
+                                       }
+                                       break;
                                default:
                                        # Call language specific Hook.
                                        $txt = $wgLang->processToken( $token, $tokenStack );
@@ -794,6 +918,19 @@ class Parser
                                $s .= $txt;
                        }
                } #end while
+
+               # make 100% sure all strong and em tags are closed
+               # doBlockLevels often messes the last bit up though, but invalid nesting is better than unclosed tags
+               # tidy solves this though
+               if( $state['em'] !== false and 
+               ( $state['strong'] === false or $state['em'] > $state['strong'] ) )
+               { 
+                       $s .= '</em>';
+                       $state['em'] = false;
+               }
+               if ( $state['strong'] !== false ) $s .= '</strong>';
+               if ( $state['em'] !== false ) $s .= '</em>';
+
                if ( count( $tokenStack ) != 0 )
                {
                        # still objects on stack. opened [[ tag without closing ]] tag.
@@ -842,7 +979,7 @@ class Parser
                if ( !$image ) { $image = Namespace::getImage(); }
                if ( !$special ) { $special = Namespace::getSpecial(); }
                if ( !$media ) { $media = Namespace::getMedia(); }
-               if ( !$category ) { $category = Namespace::getCategory(); }
+               if ( !$category ) { $category = Namespace::getCategory(); }
 
                $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 
@@ -872,7 +1009,7 @@ class Parser
                        } else {
                                $noslash=substr($m[1],1);
                        }
-                       if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
+                       if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
                                $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
                                if( "" == $text ) {
                                        $text= $m[1];
@@ -885,7 +1022,8 @@ class Parser
                } else {
                        $link = substr( $m[1], 1 );
                }
-               if( "" == $text )
+               $wasblank = ( "" == $text );
+               if( $wasblank )
                        $text = $link;
 
                $nt = Title::newFromText( $link );
@@ -898,6 +1036,7 @@ class Parser
                if( $noforce ) {
                        if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
                                array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
+                               $s .= $prefix . $trail ;
                                return (trim($s) == '')? '': $s;
                        }
                        if( $ns == $image ) {
@@ -908,7 +1047,13 @@ class Parser
                        if ( $ns == $category ) {
                                $t = $nt->getText() ;
                                $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
+                               
+                               $wgLinkCache->suspend(); # Don't save in links/brokenlinks
                                $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
+                               $wgLinkCache->resume();
+                               
+                               $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
+                               $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
                                $this->mOutput->mCategoryLinks[] = $t ;
                                $s .= $prefix . $trail ;
                                return $s ;
@@ -1013,90 +1158,103 @@ class Parser
                return $text."\n";
        }
 
-       /* private */ function doBlockLevels( $text, $linestart )
-       {
+       /* private */ function doBlockLevels( $text, $linestart ) {
                $fname = "Parser::doBlockLevels";
                wfProfileIn( $fname );
+               
                # Parsing through the text line by line.  The main thing
                # happening here is handling of block-level elements p, pre,
                # and making lists from lines starting with * # : etc.
                #
-               $a = explode( "\n", $text );
+               $textLines = explode( "\n", $text );
 
-               $lastPref = $text = $lastLine = '';
+               $lastPrefix = $output = $lastLine = '';
                $this->mDTopen = $inBlockElem = false;
-               $npl = 0;
-               $pstack = false;
-
-               if ( ! $linestart ) { $text .= array_shift( $a ); }
-               foreach ( $a as $t ) {
-                       $oLine = $t;
-                       $opl = strlen( $lastPref );
-                       $preCloseMatch = preg_match("/<\\/pre/i", $t );
-                       $preOpenMatch = preg_match("/<pre/i", $t );
+               $prefixLength = 0;
+               $paragraphStack = false;
+
+               if ( !$linestart ) {
+                       $output .= array_shift( $textLines );
+               }
+               foreach ( $textLines as $oLine ) {
+                       $lastPrefixLength = strlen( $lastPrefix );
+                       $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
+                       $preOpenMatch = preg_match("/<pre/i", $oLine );
                        if (!$this->mInPre) {
                                $this->mInPre = !empty($preOpenMatch);
                        }
                        if ( !$this->mInPre ) {
-                               $npl = strspn( $t, "*#:;" );
-                               $pref = substr( $t, 0, $npl );
+                               # Multiple prefixes may abut each other for nested lists.
+                               $prefixLength = strspn( $oLine, "*#:;" );
+                               $pref = substr( $oLine, 0, $prefixLength );
+                               
+                               # eh?
                                $pref2 = str_replace( ";", ":", $pref );
-                               $t = substr( $t, $npl );
+                               $t = substr( $oLine, $prefixLength );
                        } else {
-                               $npl = 0;
+                               # Don't interpret any other prefixes in preformatted text
+                               $prefixLength = 0;
                                $pref = $pref2 = '';
+                               $t = $oLine;
                        }
 
-                       // list generation
-                       if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
-                               $text .= $this->nextItem( substr( $pref, -1 ) );
-                               if ( $pstack ) { $pstack = false; }
+                       # List generation
+                       if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+                               # Same as the last item, so no need to deal with nesting or opening stuff
+                               $output .= $this->nextItem( substr( $pref, -1 ) );
+                               $paragraphStack = false;
 
                                if ( ";" == substr( $pref, -1 ) ) {
-                                       $cpos = strpos( $t, ":" );
-                                       if ( false !== $cpos ) {
-                                               $term = substr( $t, 0, $cpos );
-                                               $text .= $term . $this->nextItem( ":" );
-                                               $t = substr( $t, $cpos + 1 );
+                                       # The one nasty exception: definition lists work like this:
+                                       # ; title : definition text
+                                       # So we check for : in the remainder text to split up the
+                                       # title and definition, without b0rking links.
+                                       # FIXME: This is not foolproof. Something better in Tokenizer might help.
+                                       if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
+                                               $term = $match[1];
+                                               $output .= $term . $this->nextItem( ":" );
+                                               $t = $match[2];
                                        }
                                }
-                       } else if (0 != $npl || 0 != $opl) {
-                               $cpl = $this->getCommon( $pref, $lastPref );
-                               if ( $pstack ) { $pstack = false; }
-
-                               while ( $cpl < $opl ) {
-                                       $text .= $this->closeList( $lastPref{$opl-1} );
-                                       --$opl;
+                       } elseif( $prefixLength || $lastPrefixLength ) {
+                               # Either open or close a level...
+                               $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+                               $paragraphStack = false;
+
+                               while( $commonPrefixLength < $lastPrefixLength ) {
+                                       $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+                                       --$lastPrefixLength;
                                }
-                               if ( $npl <= $cpl && $cpl > 0 ) {
-                                       $text .= $this->nextItem( $pref{$cpl-1} );
+                               if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+                                       $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
                                }
-                               while ( $npl > $cpl ) {
-                                       $char = substr( $pref, $cpl, 1 );
-                                       $text .= $this->openList( $char );
+                               while ( $prefixLength > $commonPrefixLength ) {
+                                       $char = substr( $pref, $commonPrefixLength, 1 );
+                                       $output .= $this->openList( $char );
 
                                        if ( ";" == $char ) {
-                                               $cpos = strpos( $t, ":" );
-                                               if ( ! ( false === $cpos ) ) {
-                                                       $term = substr( $t, 0, $cpos );
-                                                       $text .= $term . $this->nextItem( ":" );
-                                                       $t = substr( $t, $cpos + 1 );
+                                               # FIXME: This is dupe of code above
+                                               if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
+                                                       $term = $match[1];
+                                                       $output .= $term . $this->nextItem( ":" );
+                                                       $t = $match[2];
                                                }
                                        }
-                                       ++$cpl;
+                                       ++$commonPrefixLength;
                                }
-                               $lastPref = $pref2;
+                               $lastPrefix = $pref2;
                        }
-                       if ( 0 == $npl ) { # No prefix (not in list)--go to paragraph mode
+                       if( 0 == $prefixLength ) {
+                               # No prefix (not in list)--go to paragraph mode
                                $uniq_prefix = UNIQ_PREFIX;
                                // XXX: use a stack for nestable elements like span, table and div
-                               $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<div|<pre|<tr|<td|<p|<ul|<li)/i", $t );
+                               $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
                                $closematch = preg_match(
                                        "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
-                                       "<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
+                                       "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
                                if ( $openmatch or $closematch ) {
-                                       if ( $pstack ) { $pstack = false; }
-                                       $text .= $this->closeParagraph();
+                                       $paragraphStack = false;
+                                       $output .= $this->closeParagraph();
                                        if($preOpenMatch and !$preCloseMatch) {
                                                $this->mInPre = true;   
                                        }
@@ -1106,57 +1264,57 @@ class Parser
                                                $inBlockElem = true;
                                        }
                                } else if ( !$inBlockElem ) {
-                                       if ( " " == $t{0} ) {
+                                       if ( " " == $t{0} and trim($t) != '' ) {
                                                // pre
                                                if ($this->mLastSection != 'pre') {
-                                                       $pstack = false;
-                                                       $text .= $this->closeParagraph().'<pre>';
+                                                       $paragraphStack = false;
+                                                       $output .= $this->closeParagraph().'<pre>';
                                                        $this->mLastSection = 'pre';
                                                }
                                        } else {
                                                // paragraph
                                                if ( '' == trim($t) ) {
-                                                       if ( $pstack ) {
-                                                               $text .= $pstack.'<br/>';
-                                                               $pstack = false;
+                                                       if ( $paragraphStack ) {
+                                                               $output .= $paragraphStack.'<br/>';
+                                                               $paragraphStack = false;
                                                                $this->mLastSection = 'p';
                                                        } else {
                                                                if ($this->mLastSection != 'p' ) {
-                                                                       $text .= $this->closeParagraph();
+                                                                       $output .= $this->closeParagraph();
                                                                        $this->mLastSection = '';
-                                                                       $pstack = "<p>";
+                                                                       $paragraphStack = "<p>";
                                                                } else {
-                                                                       $pstack = '</p><p>';
+                                                                       $paragraphStack = '</p><p>';
                                                                }
                                                        }
                                                } else {
-                                                       if ( $pstack ) {
-                                                               $text .= $pstack;
-                                                               $pstack = false;
+                                                       if ( $paragraphStack ) {
+                                                               $output .= $paragraphStack;
+                                                               $paragraphStack = false;
                                                                $this->mLastSection = 'p';
                                                        } else if ($this->mLastSection != 'p') {
-                                                               $text .= $this->closeParagraph().'<p>';
+                                                               $output .= $this->closeParagraph().'<p>';
                                                                $this->mLastSection = 'p';
                                                        }
                                                }
                                        }
                                }
                        }
-                       if ($pstack === false) {
-                               $text .= $t."\n";
+                       if ($paragraphStack === false) {
+                               $output .= $t."\n";
                        }
                }
-               while ( $npl ) {
-                       $text .= $this->closeList( $pref2{$npl-1} );
-                       --$npl;
+               while ( $prefixLength ) {
+                       $output .= $this->closeList( $pref2{$prefixLength-1} );
+                       --$prefixLength;
                }
                if ( "" != $this->mLastSection ) {
-                       $text .= "</" . $this->mLastSection . ">";
+                       $output .= "</" . $this->mLastSection . ">";
                        $this->mLastSection = "";
                }
 
                wfProfileOut( $fname );
-               return $text;
+               return $output;
        }
 
        function getVariableValue( $index ) {
@@ -1171,11 +1329,12 @@ class Parser
                                return $wgLang->getMonthNameGen( date("n") );
                        case MAG_CURRENTDAY:
                                return date("j");
-                       case MAG_CURRENTDAYNAME:
                        case MAG_PAGENAME:
                                return $this->mTitle->getText();
                        case MAG_NAMESPACE:
-                               return Namespace::getCanonicalName($this->mTitle->getNamespace());
+                               # return Namespace::getCanonicalName($this->mTitle->getNamespace());
+                               return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
+                       case MAG_CURRENTDAYNAME:
                                return $wgLang->getWeekdayName( date("w")+1 );
                        case MAG_CURRENTYEAR:
                                return date( "Y" );
@@ -1214,13 +1373,20 @@ class Parser
                        $this->initialiseVariables();
                }
                $titleChars = Title::legalChars();
-               $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
 
                # This function is called recursively. To keep track of arguments we need a stack:
                array_push( $this->mArgStack, $args );
 
                # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
                $GLOBALS['wgCurParser'] =& $this;
+
+               # Argument substitution
+               if ( $this->mOutputType == OT_HTML ) {
+                       $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
+               }
+
+               # Double brace substitution
+               $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
                $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
 
                array_pop( $this->mArgStack );
@@ -1234,6 +1400,8 @@ class Parser
                $fname = "Parser::braceSubstitution";
                $found = false;
                $nowiki = false;
+               $noparse = false;
+               
                $title = NULL;
 
                # $newline is an optional newline character before the braces
@@ -1249,20 +1417,30 @@ class Parser
                        $args = array();
                }
                $argc = count( $args );
+       
+               # {{{}}}
+               if ( strpos( $matches[0], "{{{" ) !== false ) {
+                       $text = $matches[0];
+                       $found = true;
+                       $noparse = true;
+               }
 
                # SUBST
-               $mwSubst =& MagicWord::get( MAG_SUBST );
-               if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
-                       if ( $this->mOutputType != OT_WIKI ) {
-                               # Invalid SUBST not replaced at PST time
-                               # Return without further processing
+               if ( !$found ) {
+                       $mwSubst =& MagicWord::get( MAG_SUBST );
+                       if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
+                               if ( $this->mOutputType != OT_WIKI ) {
+                                       # Invalid SUBST not replaced at PST time
+                                       # Return without further processing
+                                       $text = $matches[0];
+                                       $found = true;
+                                       $noparse= true;
+                               }
+                       } elseif ( $this->mOutputType == OT_WIKI ) {
+                               # SUBST not found in PST pass, do nothing
                                $text = $matches[0];
                                $found = true;
                        }
-               } elseif ( $this->mOutputType == OT_WIKI ) {
-                       # SUBST not found in PST pass, do nothing
-                       $text = $matches[0];
-                       $found = true;
                }
 
                # MSG, MSGNW and INT
@@ -1337,14 +1515,14 @@ class Parser
                        $found = true;
                        $this->mOutput->mContainsOldMagic = true;
                }
-
+/*
                # Arguments input from the caller
                $inputArgs = end( $this->mArgStack );
                if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
                        $text = $inputArgs[$part1];
                        $found = true;
                }
-
+*/
                # Load from database
                if ( !$found ) {
                        $title = Title::newFromText( $part1, NS_TEMPLATE );
@@ -1373,7 +1551,7 @@ class Parser
                # Only for HTML output
                if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
                        $text = wfEscapeWikiText( $text );
-               } elseif ( $this->mOutputType == OT_HTML && $found ) {
+               } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
                        # Clean up argument array
                        $assocArgs = array();
                        $index = 1;
@@ -1399,12 +1577,7 @@ class Parser
                        }
 
                        # Run full parser on the included text
-                       $text = $this->strip( $text, $this->mStripState );
-                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
-
-                       # Add the result to the strip state for re-inclusion after
-                       # the rest of the processing
-                       $text = $this->insertStripItem( $text, $this->mStripState );
+                       $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
 
                        # Resume the link cache and register the inclusion as a link
                        if ( !is_null( $title ) ) {
@@ -1416,10 +1589,25 @@ class Parser
                if ( !$found ) {
                        return $matches[0];
                } else {
-                       return $newline . $text;
+                       return $text;
                }
        }
 
+       # Triple brace replacement -- used for template arguments
+       function argSubstitution( $matches )
+       {
+               $newline = $matches[1];
+               $arg = trim( $matches[2] );
+               $text = $matches[0];
+               $inputArgs = end( $this->mArgStack );
+
+               if ( array_key_exists( $arg, $inputArgs ) ) {
+                       $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
+               }
+               
+               return $text;
+       }
+
        # Returns true if the function is allowed to include this entity
        function incrementIncludeCount( $dbk )
        {
@@ -1437,25 +1625,34 @@ class Parser
        # Cleans up HTML, removes dangerous tags and attributes
        /* private */ function removeHTMLtags( $text )
        {
+               global $wgUseTidy, $wgUserHtml;
                $fname = "Parser::removeHTMLtags";
                wfProfileIn( $fname );
-               $htmlpairs = array( # Tags that must be closed
-                       "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
-                       "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
-                       "strike", "strong", "tt", "var", "div", "center",
-                       "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
-                       "ruby", "rt" , "rb" , "rp", "p"
-               );
-               $htmlsingle = array(
-                       "br", "hr", "li", "dt", "dd"
-               );
-               $htmlnest = array( # Tags that can be nested--??
-                       "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
-                       "dl", "font", "big", "small", "sub", "sup"
-               );
-               $tabletags = array( # Can only appear inside table
-                       "td", "th", "tr"
-               );
+               
+               if( $wgUserHtml ) {
+                       $htmlpairs = array( # Tags that must be closed
+                               "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
+                               "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
+                               "strike", "strong", "tt", "var", "div", "center",
+                               "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
+                               "ruby", "rt" , "rb" , "rp", "p"
+                       );
+                       $htmlsingle = array(
+                               "br", "hr", "li", "dt", "dd"
+                       );
+                       $htmlnest = array( # Tags that can be nested--??
+                               "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
+                               "dl", "font", "big", "small", "sub", "sup"
+                       );
+                       $tabletags = array( # Can only appear inside table
+                               "td", "th", "tr"
+                       );
+               } else {
+                       $htmlpairs = array();
+                       $htmlsingle = array();
+                       $htmlnest = array();
+                       $tabletags = array();
+               }
 
                $htmlsingle = array_merge( $tabletags, $htmlsingle );
                $htmlelements = array_merge( $htmlsingle, $htmlpairs );
@@ -1463,70 +1660,86 @@ class Parser
                $htmlattrs = $this->getHTMLattrs () ;
 
                # Remove HTML comments
-               $text = preg_replace( "/<!--.*-->/sU", "", $text );
+               $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
 
                $bits = explode( "<", $text );
                $text = array_shift( $bits );
-               $tagstack = array(); $tablestack = array();
-
-               foreach ( $bits as $x ) {
-                       $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
-                       preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
-                         $x, $regs );
-                       list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
-                       error_reporting( $prev );
-
-                       $badtag = 0 ;
-                       if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
-                               # Check our stack
-                               if ( $slash ) {
-                                       # Closing a tag...
-                                       if ( ! in_array( $t, $htmlsingle ) &&
-                                         ( $ot = array_pop( $tagstack ) ) != $t ) {
-                                               array_push( $tagstack, $ot );
-                                               $badtag = 1;
+               if(!$wgUseTidy) {
+                       $tagstack = array(); $tablestack = array();
+                       foreach ( $bits as $x ) {
+                               $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
+                               preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+                               $x, $regs );
+                               list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+                               error_reporting( $prev );
+
+                               $badtag = 0 ;
+                               if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+                                       # Check our stack
+                                       if ( $slash ) {
+                                               # Closing a tag...
+                                               if ( ! in_array( $t, $htmlsingle ) &&
+                                               ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
+                                                       if(!empty($ot)) array_push( $tagstack, $ot );
+                                                       $badtag = 1;
+                                               } else {
+                                                       if ( $t == "table" ) {
+                                                               $tagstack = array_pop( $tablestack );
+                                                       }
+                                                       $newparams = "";
+                                               }
                                        } else {
-                                               if ( $t == "table" ) {
-                                                       $tagstack = array_pop( $tablestack );
+                                               # Keep track for later
+                                               if ( in_array( $t, $tabletags ) &&
+                                               ! in_array( "table", $tagstack ) ) {
+                                                       $badtag = 1;
+                                               } else if ( in_array( $t, $tagstack ) &&
+                                               ! in_array ( $t , $htmlnest ) ) {
+                                                       $badtag = 1 ;
+                                               } else if ( ! in_array( $t, $htmlsingle ) ) {
+                                                       if ( $t == "table" ) {
+                                                               array_push( $tablestack, $tagstack );
+                                                               $tagstack = array();
+                                                       }
+                                                       array_push( $tagstack, $t );
                                                }
-                                               $newparams = "";
+                                               # Strip non-approved attributes from the tag
+                                               $newparams = $this->fixTagAttributes($params);
+
                                        }
-                               } else {
-                                       # Keep track for later
-                                       if ( in_array( $t, $tabletags ) &&
-                                         ! in_array( "table", $tagstack ) ) {
-                                               $badtag = 1;
-                                       } else if ( in_array( $t, $tagstack ) &&
-                                         ! in_array ( $t , $htmlnest ) ) {
-                                               $badtag = 1 ;
-                                       } else if ( ! in_array( $t, $htmlsingle ) ) {
-                                               if ( $t == "table" ) {
-                                                       array_push( $tablestack, $tagstack );
-                                                       $tagstack = array();
-                                               }
-                                               array_push( $tagstack, $t );
+                                       if ( ! $badtag ) {
+                                               $rest = str_replace( ">", "&gt;", $rest );
+                                               $text .= "<$slash$t $newparams$brace$rest";
+                                               continue;
                                        }
-                                       # Strip non-approved attributes from the tag
-                                       $newparams = $this->fixTagAttributes($params);
-
                                }
-                               if ( ! $badtag ) {
+                               $text .= "&lt;" . str_replace( ">", "&gt;", $x);
+                       }
+                       # Close off any remaining tags
+                       while ( $t = array_pop( $tagstack ) ) {
+                               $text .= "</$t>\n";
+                               if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+                       }
+               } else {
+                       # this might be possible using tidy itself
+                       foreach ( $bits as $x ) {
+                               preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+                               $x, $regs );
+                               @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+                               if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+                                       $newparams = $this->fixTagAttributes($params);
                                        $rest = str_replace( ">", "&gt;", $rest );
                                        $text .= "<$slash$t $newparams$brace$rest";
-                                       continue;
+                               } else {
+                                       $text .= "&lt;" . str_replace( ">", "&gt;", $x);
                                }
-                       }
-                       $text .= "&lt;" . str_replace( ">", "&gt;", $x);
-               }
-               # Close off any remaining tags
-               while ( $t = array_pop( $tagstack ) ) {
-                       $text .= "</$t>\n";
-                       if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+                       }       
                }
                wfProfileOut( $fname );
                return $text;
        }
 
+
 /*
  *
  * This function accomplishes several tasks:
@@ -1540,8 +1753,10 @@ class Parser
  *
  */
 
-       /* private */ function formatHeadings( $text )
+       /* private */ function formatHeadings( $text, $isMain=true )
        {
+               global $wgInputEncoding;
+               
                $doNumberHeadings = $this->mOptions->getNumberHeadings();
                $doShowToc = $this->mOptions->getShowToc();
                if( !$this->mTitle->userCanEdit() ) {
@@ -1642,7 +1857,9 @@ class Parser
                        # strip out HTML
                        $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
                        $tocline = trim( $canonized_headline );
-                       $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+                       $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
+                       # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
+                       $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
                        $refer[$headlineCount] = $canonized_headline;
 
                        # count how many in assoc. array so we can track dupes in anchors
@@ -1708,7 +1925,7 @@ class Parser
                                # $full .= $sk->editSectionLink(0);
                        }
                        $full .= $block;
-                       if( $doShowToc && !$i) {
+                       if( $doShowToc && !$i && $isMain) {
                        # Top anchor now in skin
                                $full = $full.$toc;
                        }
@@ -2052,4 +2269,10 @@ function wfBraceSubstitution( $matches )
        return $wgCurParser->braceSubstitution( $matches );
 }
 
+function wfArgSubstitution( $matches )
+{
+       global $wgCurParser;
+       return $wgCurParser->argSubstitution( $matches );
+}
+
 ?>