includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 if( $GLOBALS['wgUseWikiHiero'] ){
   6         require_once('extensions/wikihiero/wikihiero.php');
   7 }
   8 if( $GLOBALS['wgUseTimeline'] ){
   9         require_once('extensions/timeline/Timeline.php');
  10 }
  11
  12 # PHP Parser
  13 #
  14 # Processes wiki markup
  15 #
  16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  18 #
  19 # Globals used:
  20 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  21 #
  22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  23 #
  24 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  25 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  26 #               $wgLocaltimezone
  27 #
  28 #      * only within ParserOptions
  29 #
  30 #
  31 #----------------------------------------
  32 #    Variable substitution O(N^2) attack
  33 #-----------------------------------------
  34 # Without countermeasures, it would be possible to attack the parser by saving a page
  35 # filled with a large number of inclusions of large pages. The size of the generated
  36 # page would be proportional to the square of the input size. Hence, we limit the number
  37 # of inclusions of any given page, thus bringing any attack back to O(N).
  38 #
  39
  40 define( "MAX_INCLUDE_REPEAT", 5 );
  41
  42 # Allowed values for $mOutputType
  43 define( "OT_HTML", 1 );
  44 define( "OT_WIKI", 2 );
  45 define( "OT_MSG", 3 );
  46
  47 # string parameter for extractTags which will cause it
  48 # to strip HTML comments in addition to regular
  49 # <XML>-style tags. This should not be anything we
  50 # may want to use in wikisyntax
  51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
  52
  53 # prefix for escaping, used in two functions at least
  54 define( "UNIQ_PREFIX", "NaodW29");
  55
  56 class Parser
  57 {
  58         # Cleared with clearState():
  59         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  60         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  61
  62         # Temporary:
  63         var $mOptions, $mTitle, $mOutputType;
  64
  65         function Parser()
  66         {
  67                 $this->clearState();
  68         }
  69
  70         function clearState()
  71         {
  72                 $this->mOutput = new ParserOutput;
  73                 $this->mAutonumber = 0;
  74                 $this->mLastSection = "";
  75                 $this->mDTopen = false;
  76                 $this->mVariables = false;
  77                 $this->mIncludeCount = array();
  78                 $this->mStripState = array();
  79                 $this->mArgStack = array();
  80         }
  81
  82         # First pass--just handle <nowiki> sections, pass the rest off
  83         # to internalParse() which does all the real work.
  84         #
  85         # Returns a ParserOutput
  86         #
  87         function parse( $text, &$title, $options, $linestart = true, $clearState = true )
  88         {
  89                 global $wgUseTidy;
  90                 $fname = "Parser::parse";
  91                 wfProfileIn( $fname );
  92
  93                 if ( $clearState ) {
  94                         $this->clearState();
  95                 }
  96
  97                 $this->mOptions = $options;
  98                 $this->mTitle =& $title;
  99                 $this->mOutputType = OT_HTML;
 100
 101                 $stripState = NULL;
 102                 $text = $this->strip( $text, $this->mStripState );
 103                 $text = $this->internalParse( $text, $linestart );
 104                 $text = $this->unstrip( $text, $this->mStripState );
 105                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 106                 if(!$wgUseTidy) {
 107                         $fixtags = array(
 108                                 # french spaces, last one Guillemet-left
 109                                 "/ (\\?|:|!|\\302\\273)/i"=>'&nbsp;\\1',
 110                                 # french spaces, Guillemet-right
 111                                 "/\\302\\253 /i"=>'\\302\\253&nbsp;',
 112                                 "/<hr *>/i" => '<hr/>',
 113                                 "/<br *>/i" => '<br/>',
 114                                 "/<center *>/i"=>'<div class="center">',
 115                                 "/<\\/center *>/i" => '</div>',
 116                                 # Clean up spare ampersands; note that we probably ought to be
 117                                 # more careful about named entities.
 118                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 119                         );
 120                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 121                 } else {
 122                         $fixtags = array(
 123                                 # french spaces, last one Guillemet-left
 124                                 "/ (\\?|:|!|\\302\\273)/i"=>'&nbsp;\\1',
 125                                 # french spaces, Guillemet-right
 126                                 "/\\302\\253 /i"=>'\\302\\253&nbsp;',
 127                                 "/<center *>/i"=>'<div class="center">',
 128                                 "/<\\/center *>/i" => '</div>'
 129                         );
 130                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 131                 }
 132                 # only once and last
 133                 $text = $this->doBlockLevels( $text, $linestart );
 134                 if($wgUseTidy) {
 135                         $text = $this->tidy($text);
 136                 }
 137                 $this->mOutput->setText( $text );
 138                 wfProfileOut( $fname );
 139                 return $this->mOutput;
 140         }
 141
 142         /* static */ function getRandomString()
 143         {
 144                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 145         }
 146
 147         # Replaces all occurrences of <$tag>content</$tag> in the text
 148         # with a random marker and returns the new text. the output parameter
 149         # $content will be an associative array filled with data on the form
 150         # $unique_marker => content.
 151
 152         # If $content is already set, the additional entries will be appended
 153
 154         # If $tag is set to STRIP_COMMENTS, the function will extract
 155         # <!-- HTML comments -->
 156
 157         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 158                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 159                 if ( !$content ) {
 160                         $content = array( );
 161                 }
 162                 $n = 1;
 163                 $stripped = "";
 164
 165                 while ( "" != $text ) {
 166                         if($tag==STRIP_COMMENTS) {
 167                                 $p = preg_split( "/<!--/i", $text, 2 );
 168                         } else {
 169                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 170                         }
 171                         $stripped .= $p[0];
 172                         if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
 173                                 $text = "";
 174                         } else {
 175                                 if($tag==STRIP_COMMENTS) {
 176                                         $q = preg_split( "/-->/i", $p[1], 2 );
 177                                 } else {
 178                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 179                                 }
 180                                 $marker = $rnd . sprintf("%08X", $n++);
 181                                 $content[$marker] = $q[0];
 182                                 $stripped .= $marker;
 183                                 $text = $q[1];
 184                         }
 185                 }
 186                 return $stripped;
 187         }
 188
 189         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 190         # If $render is set, performs necessary rendering operations on plugins
 191         # Returns the text, and fills an array with data needed in unstrip()
 192         # If the $state is already a valid strip state, it adds to the state
 193
 194         # When $stripcomments is set, HTML comments <!-- like this -->
 195         # will be stripped in addition to other tags. This is important
 196         # for section editing, where these comments cause confusion when
 197         # counting the sections in the wikisource
 198         function strip( $text, &$state, $stripcomments = false )
 199         {
 200                 $render = ($this->mOutputType == OT_HTML);
 201                 $nowiki_content = array();
 202                 $hiero_content = array();
 203                 $timeline_content = array();
 204                 $math_content = array();
 205                 $pre_content = array();
 206                 $comment_content = array();
 207
 208                 # Replace any instances of the placeholders
 209                 $uniq_prefix = UNIQ_PREFIX;
 210                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 211
 212                 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 213                 foreach( $nowiki_content as $marker => $content ){
 214                         if( $render ){
 215                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 216                         } else {
 217                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 218                         }
 219                 }
 220
 221                 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
 222                 foreach( $hiero_content as $marker => $content ){
 223                         if( $render && $GLOBALS['wgUseWikiHiero']){
 224                                 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
 225                         } else {
 226                                 $hiero_content[$marker] = "<hiero>$content</hiero>";
 227                         }
 228                 }
 229
 230                 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
 231                 foreach( $timeline_content as $marker => $content ){
 232                         if( $render && $GLOBALS['wgUseTimeline']){
 233                                 $timeline_content[$marker] = renderTimeline( $content );
 234                         } else {
 235                                 $timeline_content[$marker] = "<timeline>$content</timeline>";
 236                         }
 237                 }
 238
 239                 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
 240                 foreach( $math_content as $marker => $content ){
 241                         if( $render ) {
 242                                 if( $this->mOptions->getUseTeX() ) {
 243                                         $math_content[$marker] = renderMath( $content );
 244                                 } else {
 245                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 246                                 }
 247                         } else {
 248                                 $math_content[$marker] = "<math>$content</math>";
 249                         }
 250                 }
 251
 252                 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
 253                 foreach( $pre_content as $marker => $content ){
 254                         if( $render ){
 255                                 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
 256                         } else {
 257                                 $pre_content[$marker] = "<pre>$content</pre>";
 258                         }
 259                 }
 260                 if($stripcomments) {
 261                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 262                         foreach( $comment_content as $marker => $content ){
 263                                 $comment_content[$marker] = "<!--$content-->";
 264                         }
 265                 }
 266
 267                 # Merge state with the pre-existing state, if there is one
 268                 if ( $state ) {
 269                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 270                         $state['hiero'] = $state['hiero'] + $hiero_content;
 271                         $state['timeline'] = $state['timeline'] + $timeline_content;
 272                         $state['math'] = $state['math'] + $math_content;
 273                         $state['pre'] = $state['pre'] + $pre_content;
 274                         $state['comment'] = $state['comment'] + $comment_content;
 275                 } else {
 276                         $state = array(
 277                           'nowiki' => $nowiki_content,
 278                           'hiero' => $hiero_content,
 279                           'timeline' => $timeline_content,
 280                           'math' => $math_content,
 281                           'pre' => $pre_content,
 282                           'comment' => $comment_content
 283                         );
 284                 }
 285                 return $text;
 286         }
 287
 288         function unstrip( $text, &$state )
 289         {
 290                 # Must expand in reverse order, otherwise nested tags will be corrupted
 291                 $contentDict = end( $state );
 292                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 293                         for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 294                                 $text = str_replace( key( $contentDict ), $content, $text );
 295                         }
 296                 }
 297
 298                 return $text;
 299         }
 300
 301         # Add an item to the strip state
 302         # Returns the unique tag which must be inserted into the stripped text
 303         # The tag will be replaced with the original text in unstrip()
 304
 305         function insertStripItem( $text, &$state )
 306         {
 307                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 308                 if ( !$state ) {
 309                         $state = array(
 310                           'nowiki' => array(),
 311                           'hiero' => array(),
 312                           'math' => array(),
 313                           'pre' => array()
 314                         );
 315                 }
 316                 $state['item'][$rnd] = $text;
 317                 return $rnd;
 318         }
 319
 320         # This method generates the list of subcategories and pages for a category
 321         function categoryMagic ()
 322         {
 323                 global $wgLang , $wgUser ;
 324                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 325
 326                 $cns = Namespace::getCategory() ;
 327                 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
 328
 329                 $r = "<br style=\"clear:both;\"/>\n";
 330
 331
 332                 $sk =& $wgUser->getSkin() ;
 333
 334                 $articles = array() ;
 335                 $children = array() ;
 336                 $data = array () ;
 337                 $id = $this->mTitle->getArticleID() ;
 338
 339                 # FIXME: add limits
 340                 $t = wfStrencode( $this->mTitle->getDBKey() );
 341                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 342                 $res = wfQuery ( $sql, DB_READ ) ;
 343                 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
 344
 345                 # For all pages that link to this category
 346                 foreach ( $data AS $x )
 347                 {
 348                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 349                         if ( $t != "" ) $t .= ":" ;
 350                         $t .= $x->cur_title ;
 351
 352                         if ( $x->cur_namespace == $cns ) {
 353                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 354                         } else {
 355                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 356                         }
 357                 }
 358                 wfFreeResult ( $res ) ;
 359
 360                 # Showing subcategories
 361                 if ( count ( $children ) > 0 ) {
 362                         $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
 363                         $r .= implode ( ", " , $children ) ;
 364                 }
 365
 366                 # Showing pages in this category
 367                 if ( count ( $articles ) > 0 ) {
 368                         $ti = $this->mTitle->getText() ;
 369                         $h =  wfMsg( "category_header", $ti );
 370                         $r .= "<h2>{$h}</h2>\n" ;
 371                         $r .= implode ( ", " , $articles ) ;
 372                 }
 373
 374
 375                 return $r ;
 376         }
 377
 378         function getHTMLattrs ()
 379         {
 380                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 381                                 "title", "align", "lang", "dir", "width", "height",
 382                                 "bgcolor", "clear", /* BR */ "noshade", /* HR */
 383                                 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
 384                                 /* FONT */ "type", "start", "value", "compact",
 385                                 /* For various lists, mostly deprecated but safe */
 386                                 "summary", "width", "border", "frame", "rules",
 387                                 "cellspacing", "cellpadding", "valign", "char",
 388                                 "charoff", "colgroup", "col", "span", "abbr", "axis",
 389                                 "headers", "scope", "rowspan", "colspan", /* Tables */
 390                                 "id", "class", "name", "style" /* For CSS */
 391                                 );
 392                 return $htmlattrs ;
 393         }
 394
 395         function fixTagAttributes ( $t )
 396         {
 397                 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
 398                 $htmlattrs = $this->getHTMLattrs() ;
 399
 400                 # Strip non-approved attributes from the tag
 401                 $t = preg_replace(
 402                         "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
 403                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 404                         $t);
 405                 # Strip javascript "expression" from stylesheets. Brute force approach:
 406                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 407
 408                 if( preg_match(
 409                         "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
 410                         wfMungeToUtf8( $t ) ) )
 411                 {
 412                         $t="";
 413                 }
 414
 415                 return trim ( $t ) ;
 416         }
 417
 418         /* interface with html tidy, used if $wgUseTidy = true */
 419         function tidy ( $text ) {
 420                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 421                 global $wgInputEncoding, $wgOutputEncoding;
 422                 $cleansource = '';
 423                 switch(strtoupper($wgOutputEncoding)) {
 424                         case 'ISO-8859-1':
 425                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 426                                 break;
 427                         case 'UTF-8':
 428                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 429                                 break;
 430                         default:
 431                                 $wgTidyOpts .= ' -raw';
 432                         }
 433
 434                 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 435 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 436 '<head><title>test</title></head><body>'.$text.'</body></html>';
 437                 $descriptorspec = array(
 438                         0 => array("pipe", "r"),
 439                         1 => array("pipe", "w"),
 440                         2 => array("file", "/dev/null", "a")
 441                 );
 442                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 443                 if (is_resource($process)) {
 444                         fwrite($pipes[0], $text);
 445                         fclose($pipes[0]);
 446                         while (!feof($pipes[1])) {
 447                                 $cleansource .= fgets($pipes[1], 1024);
 448                         }
 449                         fclose($pipes[1]);
 450                         $return_value = proc_close($process);
 451                 }
 452                 if( $cleansource == '' && $text != '') {
 453                         return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
 454                 } else {
 455                         return $cleansource;
 456                 }
 457         }
 458
 459         function doTableStuff ( $t )
 460         {
 461                 $t = explode ( "\n" , $t ) ;
 462                 $td = array () ; # Is currently a td tag open?
 463                         $ltd = array () ; # Was it TD or TH?
 464                         $tr = array () ; # Is currently a tr tag open?
 465                         $ltr = array () ; # tr attributes
 466                         foreach ( $t AS $k => $x )
 467                         {
 468                                 $x = trim ( $x ) ;
 469                                 $fc = substr ( $x , 0 , 1 ) ;
 470                                 if ( "{|" == substr ( $x , 0 , 2 ) )
 471                                 {
 472                                         $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
 473                                         array_push ( $td , false ) ;
 474                                         array_push ( $ltd , "" ) ;
 475                                         array_push ( $tr , false ) ;
 476                                         array_push ( $ltr , "" ) ;
 477                                 }
 478                                 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 479                                 else if ( "|}" == substr ( $x , 0 , 2 ) )
 480                                 {
 481                                         $z = "</table>\n" ;
 482                                         $l = array_pop ( $ltd ) ;
 483                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 484                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 485                                         array_pop ( $ltr ) ;
 486                                         $t[$k] = $z ;
 487                                 }
 488                                 /*      else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
 489                                                 {
 490                                                 $z = trim ( substr ( $x , 2 ) ) ;
 491                                                 $t[$k] = "<caption>{$z}</caption>\n" ;
 492                                                 }*/
 493                                 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 494                                 {
 495                                         $x = substr ( $x , 1 ) ;
 496                                         while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 497                                         $z = "" ;
 498                                         $l = array_pop ( $ltd ) ;
 499                                         if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
 500                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 501                                         array_pop ( $ltr ) ;
 502                                         $t[$k] = $z ;
 503                                         array_push ( $tr , false ) ;
 504                                         array_push ( $td , false ) ;
 505                                         array_push ( $ltd , "" ) ;
 506                                         array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 507                                 }
 508                                 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
 509                                 {
 510                                         if ( "|+" == substr ( $x , 0 , 2 ) )
 511                                         {
 512                                                 $fc = "+" ;
 513                                                 $x = substr ( $x , 1 ) ;
 514                                         }
 515                                         $after = substr ( $x , 1 ) ;
 516                                         if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
 517                                         $after = explode ( "||" , $after ) ;
 518                                         $t[$k] = "" ;
 519                                         foreach ( $after AS $theline )
 520                                         {
 521                                                 $z = "" ;
 522                                                 if ( $fc != "+" )
 523                                                 {
 524                                                         $tra = array_pop ( $ltr ) ;
 525                                                         if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 526                                                         array_push ( $tr , true ) ;
 527                                                         array_push ( $ltr , "" ) ;
 528                                                 }
 529
 530                                                 $l = array_pop ( $ltd ) ;
 531                                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 532                                                 if ( $fc == "|" ) $l = "td" ;
 533                                                 else if ( $fc == "!" ) $l = "th" ;
 534                                                 else if ( $fc == "+" ) $l = "caption" ;
 535                                                 else $l = "" ;
 536                                                 array_push ( $ltd , $l ) ;
 537                                                 $y = explode ( "|" , $theline , 2 ) ;
 538                                                 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 539                                                 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 540                                                 $t[$k] .= $y ;
 541                                                 array_push ( $td , true ) ;
 542                                         }
 543                                 }
 544                         }
 545
 546                 # Closing open td, tr && table
 547                 while ( count ( $td ) > 0 )
 548                 {
 549                         if ( array_pop ( $td ) ) $t[] = "</td>" ;
 550                         if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
 551                         $t[] = "</table>" ;
 552                 }
 553
 554                 $t = implode ( "\n" , $t ) ;
 555                 #               $t = $this->removeHTMLtags( $t );
 556                 return $t ;
 557         }
 558
 559         # Parses the text and adds the result to the strip state
 560         # Returns the strip tag
 561         function stripParse( $text, $linestart, $args )
 562         {
 563                 $text = $this->strip( $text, $this->mStripState );
 564                 $text = $this->internalParse( $text, $linestart, $args, false );
 565                 if( $linestart ) {
 566                         $text = "\n" . $text;
 567                 }
 568                 return $this->insertStripItem( $text, $this->mStripState );
 569         }
 570
 571         function internalParse( $text, $linestart, $args = array(), $isMain=true )
 572         {
 573                 $fname = "Parser::internalParse";
 574                 wfProfileIn( $fname );
 575
 576                 $text = $this->removeHTMLtags( $text );
 577                 $text = $this->replaceVariables( $text, $args );
 578
 579                 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
 580
 581                 $text = $this->doHeadings( $text );
 582                 if($this->mOptions->getUseDynamicDates()) {
 583                         global $wgDateFormatter;
 584                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 585                 }
 586                 $text = $this->doAllQuotes( $text );
 587                 $text = $this->replaceExternalLinks( $text );
 588                 $text = $this->replaceInternalLinks ( $text );
 589                 //$text = $this->doTokenizedParser ( $text );
 590                 $text = $this->doTableStuff ( $text ) ;
 591                 $text = $this->magicISBN( $text );
 592                 $text = $this->magicRFC( $text );
 593                 $text = $this->formatHeadings( $text, $isMain );
 594                 $sk =& $this->mOptions->getSkin();
 595                 $text = $sk->transformContent( $text );
 596
 597                 if ( !isset ( $this->categoryMagicDone ) ) {
 598                         $text .= $this->categoryMagic () ;
 599                         $this->categoryMagicDone = true ;
 600                 }
 601
 602                 wfProfileOut( $fname );
 603                 return $text;
 604         }
 605
 606
 607         /* private */ function doHeadings( $text )
 608         {
 609                 for ( $i = 6; $i >= 1; --$i ) {
 610                         $h = substr( "======", 0, $i );
 611                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 612                           "<h{$i}>\\1</h{$i}>\\2", $text );
 613                 }
 614                 return $text;
 615         }
 616
 617         /* private */ function doAllQuotes( $text )
 618         {
 619                 $outtext = "";
 620                 $lines = explode( "\r\n", $text );
 621                 foreach ( $lines as $line ) {
 622                         $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
 623                 }
 624                 return $outtext;
 625         }
 626
 627         /* private */ function doQuotes( $pre, $text, $mode )
 628         {
 629                 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
 630                         $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
 631                         $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
 632                         if ( substr ($m[2], 0, 1) == "'" ) {
 633                                 $m[2] = substr ($m[2], 1);
 634                                 if ($mode == "em") {
 635                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
 636                                 } else if ($mode == "strong") {
 637                                         return $m1_strong . $this->doQuotes ( "", $m[2], "" );
 638                                 } else if (($mode == "emstrong") || ($mode == "both")) {
 639                                         return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
 640                                 } else if ($mode == "strongem") {
 641                                         return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
 642                                 } else {
 643                                         return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
 644                                 }
 645                         } else {
 646                                 if ($mode == "strong") {
 647                                         return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
 648                                 } else if ($mode == "em") {
 649                                         return $m1_em . $this->doQuotes ( "", $m[2], "" );
 650                                 } else if ($mode == "emstrong") {
 651                                         return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
 652                                 } else if (($mode == "strongem") || ($mode == "both")) {
 653                                         return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
 654                                 } else {
 655                                         return $m[1] . $this->doQuotes ( "", $m[2], "em" );
 656                                 }
 657                         }
 658                 } else {
 659                         $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
 660                         $text_em = ($text == "") ? "" : "<em>{$text}</em>";
 661                         if ($mode == "") {
 662                                 return $pre . $text;
 663                         } else if ($mode == "em") {
 664                                 return $pre . $text_em;
 665                         } else if ($mode == "strong") {
 666                                 return $pre . $text_strong;
 667                         } else if ($mode == "strongem") {
 668                                 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
 669                         } else {
 670                                 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
 671                         }
 672                 }
 673         }
 674
 675         # Note: we have to do external links before the internal ones,
 676         # and otherwise take great care in the order of things here, so
 677         # that we don't end up interpreting some URLs twice.
 678
 679         /* private */ function replaceExternalLinks( $text )
 680         {
 681                 $fname = "Parser::replaceExternalLinks";
 682                 wfProfileIn( $fname );
 683                 $text = $this->subReplaceExternalLinks( $text, "http", true );
 684                 $text = $this->subReplaceExternalLinks( $text, "https", true );
 685                 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
 686                 $text = $this->subReplaceExternalLinks( $text, "irc", false );
 687                 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
 688                 $text = $this->subReplaceExternalLinks( $text, "news", false );
 689                 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
 690                 wfProfileOut( $fname );
 691                 return $text;
 692         }
 693
 694         /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
 695         {
 696                 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
 697                 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
 698
 699                 # this is  the list of separators that should be ignored if they
 700                 # are the last character of an URL but that should be included
 701                 # if they occur within the URL, e.g. "go to www.foo.com, where .."
 702                 # in this case, the last comma should not become part of the URL,
 703                 # but in "www.foo.com/123,2342,32.htm" it should.
 704                 $sep = ",;\.:";
 705                 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
 706                 $images = "gif|png|jpg|jpeg";
 707
 708                 # PLEASE NOTE: The curly braces { } are not part of the regex,
 709                 # they are interpreted as part of the string (used to tell PHP
 710                 # that the content of the string should be inserted there).
 711                 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
 712                   "((?i){$images})([^{$uc}]|$)/";
 713
 714                 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
 715                 $sk =& $this->mOptions->getSkin();
 716
 717                 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
 718                         $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
 719                           "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
 720                 }
 721                 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
 722                   $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
 723                   "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
 724                   "</a>\\5", $s );
 725                 $s = str_replace( $unique, $protocol, $s );
 726
 727                 $a = explode( "[{$protocol}:", " " . $s );
 728                 $s = array_shift( $a );
 729                 $s = substr( $s, 1 );
 730
 731                 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
 732                 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
 733
 734                 foreach ( $a as $line ) {
 735                         if ( preg_match( $e1, $line, $m ) ) {
 736                                 $link = "{$protocol}:{$m[1]}";
 737                                 $trail = $m[2];
 738                                 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
 739                                 else { $text = wfEscapeHTML( $link ); }
 740                         } else if ( preg_match( $e2, $line, $m ) ) {
 741                                 $link = "{$protocol}:{$m[1]}";
 742                                 $text = $m[2];
 743                                 $trail = $m[3];
 744                         } else {
 745                                 $s .= "[{$protocol}:" . $line;
 746                                 continue;
 747                         }
 748                         if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
 749                                 $paren = "";
 750                         } else {
 751                                 # Expand the URL for printable version
 752                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
 753                         }
 754                         $la = $sk->getExternalLinkAttributes( $link, $text );
 755                         $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
 756
 757                 }
 758                 return $s;
 759         }
 760
 761
 762         /* private */ function replaceInternalLinks( $s )
 763         {
 764                 global $wgLang, $wgLinkCache;
 765                 global $wgNamespacesWithSubpages, $wgLanguageCode;
 766                 static $fname = "Parser::replaceInternalLink" ;
 767                 wfProfileIn( $fname );
 768
 769                 wfProfileIn( "$fname-setup" );
 770                 static $tc = FALSE;
 771                 # the % is needed to support urlencoded titles as well
 772                 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
 773                 $sk =& $this->mOptions->getSkin();
 774
 775                 $a = explode( "[[", " " . $s );
 776                 $s = array_shift( $a );
 777                 $s = substr( $s, 1 );
 778
 779                 # Match a link having the form [[namespace:link|alternate]]trail
 780                 static $e1 = FALSE;
 781                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
 782                 # Match the end of a line for a word that's not followed by whitespace,
 783                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
 784                 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
 785                 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
 786                 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
 787
 788
 789                 # Special and Media are pseudo-namespaces; no pages actually exist in them
 790                 static $image = FALSE;
 791                 static $special = FALSE;
 792                 static $media = FALSE;
 793                 static $category = FALSE;
 794                 if ( !$image ) { $image = Namespace::getImage(); }
 795                 if ( !$special ) { $special = Namespace::getSpecial(); }
 796                 if ( !$media ) { $media = Namespace::getMedia(); }
 797                 if ( !$category ) { $category = Namespace::getCategory(); }
 798
 799                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
 800
 801                 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
 802                         $new_prefix = $m[2];
 803                         $s = $m[1];
 804                 } else {
 805                         $new_prefix="";
 806                 }
 807
 808                 wfProfileOut( "$fname-setup" );
 809
 810                 foreach ( $a as $line ) {
 811                         $prefix = $new_prefix;
 812
 813                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
 814                                 $text = $m[2];
 815                                 # fix up urlencoded title texts
 816                                 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
 817                                 $trail = $m[3];
 818                         } else { # Invalid form; output directly
 819                                 $s .= $prefix . "[[" . $line ;
 820                                 wfProfileOut( $fname );
 821                                 continue;
 822                         }
 823
 824                         /* Valid link forms:
 825                         Foobar -- normal
 826                         :Foobar -- override special treatment of prefix (images, language links)
 827                         /Foobar -- convert to CurrentPage/Foobar
 828                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
 829                         */
 830                         $c = substr($m[1],0,1);
 831                         $noforce = ($c != ":");
 832                         if( $c == "/" ) { # subpage
 833                                 if(substr($m[1],-1,1)=="/") {                 # / at end means we don't want the slash to be shown
 834                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
 835                                         $noslash=$m[1];
 836                                 } else {
 837                                         $noslash=substr($m[1],1);
 838                                 }
 839                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
 840                                         $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
 841                                         if( "" == $text ) {
 842                                                 $text= $m[1];
 843                                         } # this might be changed for ugliness reasons
 844                                 } else {
 845                                         $link = $noslash; # no subpage allowed, use standard link
 846                                 }
 847                         } elseif( $noforce ) { # no subpage
 848                                 $link = $m[1];
 849                         } else {
 850                                 $link = substr( $m[1], 1 );
 851                         }
 852                         $wasblank = ( "" == $text );
 853                         if( $wasblank )
 854                         $text = $link;
 855
 856                         $nt = Title::newFromText( $link );
 857                         if( !$nt ) {
 858                                 $s .= $prefix . "[[" . $line;
 859                                 wfProfileOut( $fname );
 860                                 continue;
 861                         }
 862                         $ns = $nt->getNamespace();
 863                         $iw = $nt->getInterWiki();
 864                         if( $noforce ) {
 865                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
 866                                         array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
 867                                         $s .= $prefix . $trail ;
 868                                         wfProfileOut( $fname );
 869                                         return (trim($s) == '')? '': $s;
 870                                         continue;
 871                                 }
 872                                 if ( $ns == $image ) {
 873                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
 874                                         $wgLinkCache->addImageLinkObj( $nt );
 875                                         wfProfileOut( $fname );
 876                                         continue;
 877                                 } else if ( $ns == $category ) {
 878                                         $t = $nt->getText() ;
 879                                         $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
 880
 881                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
 882                                         $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
 883                                         $wgLinkCache->resume();
 884
 885                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
 886                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
 887                                         $this->mOutput->mCategoryLinks[] = $t ;
 888                                         $s .= $prefix . $trail ;
 889                                         wfProfileOut( $fname );
 890                                         continue;
 891                                 }
 892                         }
 893                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
 894                         ( strpos( $link, "#" ) == FALSE ) ) {
 895                                 # Self-links are handled specially; generally de-link and change to bold.
 896                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
 897                                 wfProfileOut( $fname );
 898                                 continue;
 899                         }
 900
 901                         if( $ns == $media ) {
 902                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
 903                                 $wgLinkCache->addImageLinkObj( $nt );
 904                                 wfProfileOut( $fname );
 905                                 continue;
 906                         } elseif( $ns == $special ) {
 907                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
 908                                 wfProfileOut( $fname );
 909                                 continue;
 910                         }
 911                         $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
 912                 }
 913                 wfProfileOut( $fname );
 914                 return $s;
 915         }
 916
 917         # Some functions here used by doBlockLevels()
 918         #
 919         /* private */ function closeParagraph()
 920         {
 921                 $result = "";
 922                 if ( '' != $this->mLastSection ) {
 923                         $result = "</" . $this->mLastSection  . ">\n";
 924                 }
 925                 $this->mInPre = false;
 926                 $this->mLastSection = "";
 927                 return $result;
 928         }
 929         # getCommon() returns the length of the longest common substring
 930         # of both arguments, starting at the beginning of both.
 931         #
 932         /* private */ function getCommon( $st1, $st2 )
 933         {
 934                 $fl = strlen( $st1 );
 935                 $shorter = strlen( $st2 );
 936                 if ( $fl < $shorter ) { $shorter = $fl; }
 937
 938                 for ( $i = 0; $i < $shorter; ++$i ) {
 939                         if ( $st1{$i} != $st2{$i} ) { break; }
 940                 }
 941                 return $i;
 942         }
 943         # These next three functions open, continue, and close the list
 944         # element appropriate to the prefix character passed into them.
 945         #
 946         /* private */ function openList( $char )
 947     {
 948                 $result = $this->closeParagraph();
 949
 950                 if ( "*" == $char ) { $result .= "<ul><li>"; }
 951                 else if ( "#" == $char ) { $result .= "<ol><li>"; }
 952                 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
 953                 else if ( ";" == $char ) {
 954                         $result .= "<dl><dt>";
 955                         $this->mDTopen = true;
 956                 }
 957                 else { $result = "<!-- ERR 1 -->"; }
 958
 959                 return $result;
 960         }
 961
 962         /* private */ function nextItem( $char )
 963         {
 964                 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
 965                 else if ( ":" == $char || ";" == $char ) {
 966                         $close = "</dd>";
 967                         if ( $this->mDTopen ) { $close = "</dt>"; }
 968                         if ( ";" == $char ) {
 969                                 $this->mDTopen = true;
 970                                 return $close . "<dt>";
 971                         } else {
 972                                 $this->mDTopen = false;
 973                                 return $close . "<dd>";
 974                         }
 975                 }
 976                 return "<!-- ERR 2 -->";
 977         }
 978
 979         /* private */function closeList( $char )
 980         {
 981                 if ( "*" == $char ) { $text = "</li></ul>"; }
 982                 else if ( "#" == $char ) { $text = "</li></ol>"; }
 983                 else if ( ":" == $char ) {
 984                         if ( $this->mDTopen ) {
 985                                 $this->mDTopen = false;
 986                                 $text = "</dt></dl>";
 987                         } else {
 988                                 $text = "</dd></dl>";
 989                         }
 990                 }
 991                 else {  return "<!-- ERR 3 -->"; }
 992                 return $text."\n";
 993         }
 994
 995         /* private */ function doBlockLevels( $text, $linestart ) {
 996                 $fname = "Parser::doBlockLevels";
 997                 wfProfileIn( $fname );
 998
 999                 # Parsing through the text line by line.  The main thing
1000                 # happening here is handling of block-level elements p, pre,
1001                 # and making lists from lines starting with * # : etc.
1002                 #
1003                 $textLines = explode( "\n", $text );
1004
1005                 $lastPrefix = $output = $lastLine = '';
1006                 $this->mDTopen = $inBlockElem = false;
1007                 $prefixLength = 0;
1008                 $paragraphStack = false;
1009
1010                 if ( !$linestart ) {
1011                         $output .= array_shift( $textLines );
1012                 }
1013                 foreach ( $textLines as $oLine ) {
1014                         $lastPrefixLength = strlen( $lastPrefix );
1015                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1016                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1017                         if (!$this->mInPre) {
1018                                 $this->mInPre = !empty($preOpenMatch);
1019                         }
1020                         if ( !$this->mInPre ) {
1021                                 # Multiple prefixes may abut each other for nested lists.
1022                                 $prefixLength = strspn( $oLine, "*#:;" );
1023                                 $pref = substr( $oLine, 0, $prefixLength );
1024
1025                                 # eh?
1026                                 $pref2 = str_replace( ";", ":", $pref );
1027                                 $t = substr( $oLine, $prefixLength );
1028                         } else {
1029                                 # Don't interpret any other prefixes in preformatted text
1030                                 $prefixLength = 0;
1031                                 $pref = $pref2 = '';
1032                                 $t = $oLine;
1033                         }
1034
1035                         # List generation
1036                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1037                                 # Same as the last item, so no need to deal with nesting or opening stuff
1038                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1039                                 $paragraphStack = false;
1040
1041                                 if ( ";" == substr( $pref, -1 ) ) {
1042                                         # The one nasty exception: definition lists work like this:
1043                                         # ; title : definition text
1044                                         # So we check for : in the remainder text to split up the
1045                                         # title and definition, without b0rking links.
1046                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1047                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1048                                                 $term = $match[1];
1049                                                 $output .= $term . $this->nextItem( ":" );
1050                                                 $t = $match[2];
1051                                         }
1052                                 }
1053                         } elseif( $prefixLength || $lastPrefixLength ) {
1054                                 # Either open or close a level...
1055                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1056                                 $paragraphStack = false;
1057
1058                                 while( $commonPrefixLength < $lastPrefixLength ) {
1059                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1060                                         --$lastPrefixLength;
1061                                 }
1062                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1063                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1064                                 }
1065                                 while ( $prefixLength > $commonPrefixLength ) {
1066                                         $char = substr( $pref, $commonPrefixLength, 1 );
1067                                         $output .= $this->openList( $char );
1068
1069                                         if ( ";" == $char ) {
1070                                                 # FIXME: This is dupe of code above
1071                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1072                                                         $term = $match[1];
1073                                                         $output .= $term . $this->nextItem( ":" );
1074                                                         $t = $match[2];
1075                                                 }
1076                                         }
1077                                         ++$commonPrefixLength;
1078                                 }
1079                                 $lastPrefix = $pref2;
1080                         }
1081                         if( 0 == $prefixLength ) {
1082                                 # No prefix (not in list)--go to paragraph mode
1083                                 $uniq_prefix = UNIQ_PREFIX;
1084                                 // XXX: use a stack for nestable elements like span, table and div
1085                                 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1086                                 $closematch = preg_match(
1087                                         "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1088                                         "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1089                                 if ( $openmatch or $closematch ) {
1090                                         $paragraphStack = false;
1091                                         $output .= $this->closeParagraph();
1092                                         if($preOpenMatch and !$preCloseMatch) {
1093                                                 $this->mInPre = true;
1094                                         }
1095                                         if ( $closematch  ) {
1096                                                 $inBlockElem = false;
1097                                         } else {
1098                                                 $inBlockElem = true;
1099                                         }
1100                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1101                                         if ( " " == $t{0} and trim($t) != '' ) {
1102                                                 // pre
1103                                                 if ($this->mLastSection != 'pre') {
1104                                                         $paragraphStack = false;
1105                                                         $output .= $this->closeParagraph().'<pre>';
1106                                                         $this->mLastSection = 'pre';
1107                                                 }
1108                                         } else {
1109                                                 // paragraph
1110                                                 if ( '' == trim($t) ) {
1111                                                         if ( $paragraphStack ) {
1112                                                                 $output .= $paragraphStack.'<br/>';
1113                                                                 $paragraphStack = false;
1114                                                                 $this->mLastSection = 'p';
1115                                                         } else {
1116                                                                 if ($this->mLastSection != 'p' ) {
1117                                                                         $output .= $this->closeParagraph();
1118                                                                         $this->mLastSection = '';
1119                                                                         $paragraphStack = "<p>";
1120                                                                 } else {
1121                                                                         $paragraphStack = '</p><p>';
1122                                                                 }
1123                                                         }
1124                                                 } else {
1125                                                         if ( $paragraphStack ) {
1126                                                                 $output .= $paragraphStack;
1127                                                                 $paragraphStack = false;
1128                                                                 $this->mLastSection = 'p';
1129                                                         } else if ($this->mLastSection != 'p') {
1130                                                                 $output .= $this->closeParagraph().'<p>';
1131                                                                 $this->mLastSection = 'p';
1132                                                         }
1133                                                 }
1134                                         }
1135                                 }
1136                         }
1137                         if ($paragraphStack === false) {
1138                                 $output .= $t."\n";
1139                         }
1140                 }
1141                 while ( $prefixLength ) {
1142                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1143                         --$prefixLength;
1144                 }
1145                 if ( "" != $this->mLastSection ) {
1146                         $output .= "</" . $this->mLastSection . ">";
1147                         $this->mLastSection = "";
1148                 }
1149
1150                 wfProfileOut( $fname );
1151                 return $output;
1152         }
1153
1154         function getVariableValue( $index ) {
1155                 global $wgLang, $wgSitename, $wgServer;
1156
1157                 switch ( $index ) {
1158                         case MAG_CURRENTMONTH:
1159                                 return date( "m" );
1160                         case MAG_CURRENTMONTHNAME:
1161                                 return $wgLang->getMonthName( date("n") );
1162                         case MAG_CURRENTMONTHNAMEGEN:
1163                                 return $wgLang->getMonthNameGen( date("n") );
1164                         case MAG_CURRENTDAY:
1165                                 return date("j");
1166                         case MAG_PAGENAME:
1167                                 return $this->mTitle->getText();
1168                         case MAG_NAMESPACE:
1169                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1170                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1171                         case MAG_CURRENTDAYNAME:
1172                                 return $wgLang->getWeekdayName( date("w")+1 );
1173                         case MAG_CURRENTYEAR:
1174                                 return date( "Y" );
1175                         case MAG_CURRENTTIME:
1176                                 return $wgLang->time( wfTimestampNow(), false );
1177                         case MAG_NUMBEROFARTICLES:
1178                                 return wfNumberOfArticles();
1179                         case MAG_SITENAME:
1180                                 return $wgSitename;
1181                         case MAG_SERVER:
1182                                 return $wgServer;
1183                         default:
1184                                 return NULL;
1185                 }
1186         }
1187
1188         function initialiseVariables()
1189         {
1190                 global $wgVariableIDs;
1191                 $this->mVariables = array();
1192                 foreach ( $wgVariableIDs as $id ) {
1193                         $mw =& MagicWord::get( $id );
1194                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1195                 }
1196         }
1197
1198         /* private */ function replaceVariables( $text, $args = array() )
1199         {
1200                 global $wgLang, $wgScript, $wgArticlePath;
1201
1202                 $fname = "Parser::replaceVariables";
1203                 wfProfileIn( $fname );
1204
1205                 $bail = false;
1206                 if ( !$this->mVariables ) {
1207                         $this->initialiseVariables();
1208                 }
1209                 $titleChars = Title::legalChars();
1210
1211                 # This function is called recursively. To keep track of arguments we need a stack:
1212                 array_push( $this->mArgStack, $args );
1213
1214                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1215                 $GLOBALS['wgCurParser'] =& $this;
1216
1217                 # Argument substitution
1218                 if ( $this->mOutputType == OT_HTML ) {
1219                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1220                 }
1221
1222                 # Double brace substitution
1223                 $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s";
1224                 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1225
1226                 array_pop( $this->mArgStack );
1227
1228                 wfProfileOut( $fname );
1229                 return $text;
1230         }
1231
1232         function braceSubstitution( $matches )
1233         {
1234                 global $wgLinkCache, $wgLang;
1235                 $fname = "Parser::braceSubstitution";
1236                 $found = false;
1237                 $nowiki = false;
1238                 $noparse = false;
1239
1240                 $title = NULL;
1241
1242                 # $newline is an optional newline character before the braces
1243                 # $part1 is the bit before the first |, and must contain only title characters
1244                 # $args is a list of arguments, starting from index 0, not including $part1
1245
1246                 $newline = $matches[1];
1247                 $part1 = $matches[2];
1248                 # If the third subpattern matched anything, it will start with |
1249                 if ( $matches[3] !== "" ) {
1250                         $args = explode( "|", substr( $matches[3], 1 ) );
1251                 } else {
1252                         $args = array();
1253                 }
1254                 $argc = count( $args );
1255
1256                 # {{{}}}
1257                 if ( strpos( $matches[0], "{{{" ) !== false ) {
1258                         $text = $matches[0];
1259                         $found = true;
1260                         $noparse = true;
1261                 }
1262
1263                 # SUBST
1264                 if ( !$found ) {
1265                         $mwSubst =& MagicWord::get( MAG_SUBST );
1266                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1267                                 if ( $this->mOutputType != OT_WIKI ) {
1268                                         # Invalid SUBST not replaced at PST time
1269                                         # Return without further processing
1270                                         $text = $matches[0];
1271                                         $found = true;
1272                                         $noparse= true;
1273                                 }
1274                         } elseif ( $this->mOutputType == OT_WIKI ) {
1275                                 # SUBST not found in PST pass, do nothing
1276                                 $text = $matches[0];
1277                                 $found = true;
1278                         }
1279                 }
1280
1281                 # MSG, MSGNW and INT
1282                 if ( !$found ) {
1283                         # Check for MSGNW:
1284                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1285                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1286                                 $nowiki = true;
1287                         } else {
1288                                 # Remove obsolete MSG:
1289                                 $mwMsg =& MagicWord::get( MAG_MSG );
1290                                 $mwMsg->matchStartAndRemove( $part1 );
1291                         }
1292
1293                         # Check if it is an internal message
1294                         $mwInt =& MagicWord::get( MAG_INT );
1295                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1296                                 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1297                                         $text = wfMsgReal( $part1, $args, true );
1298                                         $found = true;
1299                                 }
1300                         }
1301                 }
1302
1303                 # NS
1304                 if ( !$found ) {
1305                         # Check for NS: (namespace expansion)
1306                         $mwNs = MagicWord::get( MAG_NS );
1307                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1308                                 if ( intval( $part1 ) ) {
1309                                         $text = $wgLang->getNsText( intval( $part1 ) );
1310                                         $found = true;
1311                                 } else {
1312                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1313                                         if ( !is_null( $index ) ) {
1314                                                 $text = $wgLang->getNsText( $index );
1315                                                 $found = true;
1316                                         }
1317                                 }
1318                         }
1319                 }
1320
1321                 # LOCALURL and LOCALURLE
1322                 if ( !$found ) {
1323                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1324                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1325
1326                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1327                                 $func = 'getLocalURL';
1328                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1329                                 $func = 'escapeLocalURL';
1330                         } else {
1331                                 $func = '';
1332                         }
1333
1334                         if ( $func !== '' ) {
1335                                 $title = Title::newFromText( $part1 );
1336                                 if ( !is_null( $title ) ) {
1337                                         if ( $argc > 0 ) {
1338                                                 $text = $title->$func( $args[0] );
1339                                         } else {
1340                                                 $text = $title->$func();
1341                                         }
1342                                         $found = true;
1343                                 }
1344                         }
1345                 }
1346
1347                 # Internal variables
1348                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1349                         $text = $this->mVariables[$part1];
1350                         $found = true;
1351                         $this->mOutput->mContainsOldMagic = true;
1352                 }
1353 /*
1354                 # Arguments input from the caller
1355                 $inputArgs = end( $this->mArgStack );
1356                 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1357                         $text = $inputArgs[$part1];
1358                         $found = true;
1359                 }
1360 */
1361                 # Load from database
1362                 if ( !$found ) {
1363                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1364                         if ( !is_null( $title ) && !$title->isExternal() ) {
1365                                 # Check for excessive inclusion
1366                                 $dbk = $title->getPrefixedDBkey();
1367                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1368                                         $article = new Article( $title );
1369                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1370                                         if ( $articleContent !== false ) {
1371                                                 $found = true;
1372                                                 $text = $articleContent;
1373
1374                                         }
1375                                 }
1376
1377                                 # If the title is valid but undisplayable, make a link to it
1378                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1379                                         $text = "[[" . $title->getPrefixedText() . "]]";
1380                                         $found = true;
1381                                 }
1382                         }
1383                 }
1384
1385                 # Recursive parsing, escaping and link table handling
1386                 # Only for HTML output
1387                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1388                         $text = wfEscapeWikiText( $text );
1389                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1390                         # Clean up argument array
1391                         $assocArgs = array();
1392                         $index = 1;
1393                         foreach( $args as $arg ) {
1394                                 $eqpos = strpos( $arg, "=" );
1395                                 if ( $eqpos === false ) {
1396                                         $assocArgs[$index++] = $arg;
1397                                 } else {
1398                                         $name = trim( substr( $arg, 0, $eqpos ) );
1399                                         $value = trim( substr( $arg, $eqpos+1 ) );
1400                                         if ( $value === false ) {
1401                                                 $value = "";
1402                                         }
1403                                         if ( $name !== false ) {
1404                                                 $assocArgs[$name] = $value;
1405                                         }
1406                                 }
1407                         }
1408
1409                         # Do not enter included links in link table
1410                         if ( !is_null( $title ) ) {
1411                                 $wgLinkCache->suspend();
1412                         }
1413
1414                         # Run full parser on the included text
1415                         $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1416
1417                         # Resume the link cache and register the inclusion as a link
1418                         if ( !is_null( $title ) ) {
1419                                 $wgLinkCache->resume();
1420                                 $wgLinkCache->addLinkObj( $title );
1421                         }
1422                 }
1423
1424                 if ( !$found ) {
1425                         return $matches[0];
1426                 } else {
1427                         return $text;
1428                 }
1429         }
1430
1431         # Triple brace replacement -- used for template arguments
1432         function argSubstitution( $matches )
1433         {
1434                 $newline = $matches[1];
1435                 $arg = trim( $matches[2] );
1436                 $text = $matches[0];
1437                 $inputArgs = end( $this->mArgStack );
1438
1439                 if ( array_key_exists( $arg, $inputArgs ) ) {
1440                         $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1441                 }
1442
1443                 return $text;
1444         }
1445
1446         # Returns true if the function is allowed to include this entity
1447         function incrementIncludeCount( $dbk )
1448         {
1449                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1450                         $this->mIncludeCount[$dbk] = 0;
1451                 }
1452                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1453                         return true;
1454                 } else {
1455                         return false;
1456                 }
1457         }
1458
1459
1460         # Cleans up HTML, removes dangerous tags and attributes
1461         /* private */ function removeHTMLtags( $text )
1462         {
1463                 global $wgUseTidy, $wgUserHtml;
1464                 $fname = "Parser::removeHTMLtags";
1465                 wfProfileIn( $fname );
1466
1467                 if( $wgUserHtml ) {
1468                         $htmlpairs = array( # Tags that must be closed
1469                                 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1470                                 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1471                                 "strike", "strong", "tt", "var", "div", "center",
1472                                 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1473                                 "ruby", "rt" , "rb" , "rp", "p"
1474                         );
1475                         $htmlsingle = array(
1476                                 "br", "hr", "li", "dt", "dd"
1477                         );
1478                         $htmlnest = array( # Tags that can be nested--??
1479                                 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1480                                 "dl", "font", "big", "small", "sub", "sup"
1481                         );
1482                         $tabletags = array( # Can only appear inside table
1483                                 "td", "th", "tr"
1484                         );
1485                 } else {
1486                         $htmlpairs = array();
1487                         $htmlsingle = array();
1488                         $htmlnest = array();
1489                         $tabletags = array();
1490                 }
1491
1492                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1493                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1494
1495                 $htmlattrs = $this->getHTMLattrs () ;
1496
1497                 # Remove HTML comments
1498                 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1499
1500                 $bits = explode( "<", $text );
1501                 $text = array_shift( $bits );
1502                 if(!$wgUseTidy) {
1503                         $tagstack = array(); $tablestack = array();
1504                         foreach ( $bits as $x ) {
1505                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1506                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1507                                 $x, $regs );
1508                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1509                                 error_reporting( $prev );
1510
1511                                 $badtag = 0 ;
1512                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1513                                         # Check our stack
1514                                         if ( $slash ) {
1515                                                 # Closing a tag...
1516                                                 if ( ! in_array( $t, $htmlsingle ) &&
1517                                                 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1518                                                         if(!empty($ot)) array_push( $tagstack, $ot );
1519                                                         $badtag = 1;
1520                                                 } else {
1521                                                         if ( $t == "table" ) {
1522                                                                 $tagstack = array_pop( $tablestack );
1523                                                         }
1524                                                         $newparams = "";
1525                                                 }
1526                                         } else {
1527                                                 # Keep track for later
1528                                                 if ( in_array( $t, $tabletags ) &&
1529                                                 ! in_array( "table", $tagstack ) ) {
1530                                                         $badtag = 1;
1531                                                 } else if ( in_array( $t, $tagstack ) &&
1532                                                 ! in_array ( $t , $htmlnest ) ) {
1533                                                         $badtag = 1 ;
1534                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
1535                                                         if ( $t == "table" ) {
1536                                                                 array_push( $tablestack, $tagstack );
1537                                                                 $tagstack = array();
1538                                                         }
1539                                                         array_push( $tagstack, $t );
1540                                                 }
1541                                                 # Strip non-approved attributes from the tag
1542                                                 $newparams = $this->fixTagAttributes($params);
1543
1544                                         }
1545                                         if ( ! $badtag ) {
1546                                                 $rest = str_replace( ">", "&gt;", $rest );
1547                                                 $text .= "<$slash$t $newparams$brace$rest";
1548                                                 continue;
1549                                         }
1550                                 }
1551                                 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1552                         }
1553                         # Close off any remaining tags
1554                         while ( $t = array_pop( $tagstack ) ) {
1555                                 $text .= "</$t>\n";
1556                                 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1557                         }
1558                 } else {
1559                         # this might be possible using tidy itself
1560                         foreach ( $bits as $x ) {
1561                                 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1562                                 $x, $regs );
1563                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1564                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1565                                         $newparams = $this->fixTagAttributes($params);
1566                                         $rest = str_replace( ">", "&gt;", $rest );
1567                                         $text .= "<$slash$t $newparams$brace$rest";
1568                                 } else {
1569                                         $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1570                                 }
1571                         }
1572                 }
1573                 wfProfileOut( $fname );
1574                 return $text;
1575         }
1576
1577
1578 /*
1579  *
1580  * This function accomplishes several tasks:
1581  * 1) Auto-number headings if that option is enabled
1582  * 2) Add an [edit] link to sections for logged in users who have enabled the option
1583  * 3) Add a Table of contents on the top for users who have enabled the option
1584  * 4) Auto-anchor headings
1585  *
1586  * It loops through all headlines, collects the necessary data, then splits up the
1587  * string and re-inserts the newly formatted headlines.
1588  *
1589  */
1590
1591         /* private */ function formatHeadings( $text, $isMain=true )
1592         {
1593                 global $wgInputEncoding;
1594
1595                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1596                 $doShowToc = $this->mOptions->getShowToc();
1597                 if( !$this->mTitle->userCanEdit() ) {
1598                         $showEditLink = 0;
1599                         $rightClickHack = 0;
1600                 } else {
1601                         $showEditLink = $this->mOptions->getEditSection();
1602                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1603                 }
1604
1605                 # Inhibit editsection links if requested in the page
1606                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1607                 if( $esw->matchAndRemove( $text ) ) {
1608                         $showEditLink = 0;
1609                 }
1610                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1611                 # do not add TOC
1612                 $mw =& MagicWord::get( MAG_NOTOC );
1613                 if( $mw->matchAndRemove( $text ) ) {
1614                         $doShowToc = 0;
1615                 }
1616
1617                 # never add the TOC to the Main Page. This is an entry page that should not
1618                 # be more than 1-2 screens large anyway
1619                 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1620                         $doShowToc = 0;
1621                 }
1622
1623                 # Get all headlines for numbering them and adding funky stuff like [edit]
1624                 # links - this is for later, but we need the number of headlines right now
1625                 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1626
1627                 # if there are fewer than 4 headlines in the article, do not show TOC
1628                 if( $numMatches < 4 ) {
1629                         $doShowToc = 0;
1630                 }
1631
1632                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1633                 # override above conditions and always show TOC
1634                 $mw =& MagicWord::get( MAG_FORCETOC );
1635                 if ($mw->matchAndRemove( $text ) ) {
1636                         $doShowToc = 1;
1637                 }
1638
1639
1640                 # We need this to perform operations on the HTML
1641                 $sk =& $this->mOptions->getSkin();
1642
1643                 # headline counter
1644                 $headlineCount = 0;
1645
1646                 # Ugh .. the TOC should have neat indentation levels which can be
1647                 # passed to the skin functions. These are determined here
1648                 $toclevel = 0;
1649                 $toc = "";
1650                 $full = "";
1651                 $head = array();
1652                 $sublevelCount = array();
1653                 $level = 0;
1654                 $prevlevel = 0;
1655                 foreach( $matches[3] as $headline ) {
1656                         $numbering = "";
1657                         if( $level ) {
1658                                 $prevlevel = $level;
1659                         }
1660                         $level = $matches[1][$headlineCount];
1661                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1662                                 # reset when we enter a new level
1663                                 $sublevelCount[$level] = 0;
1664                                 $toc .= $sk->tocIndent( $level - $prevlevel );
1665                                 $toclevel += $level - $prevlevel;
1666                         }
1667                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1668                                 # reset when we step back a level
1669                                 $sublevelCount[$level+1]=0;
1670                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
1671                                 $toclevel -= $prevlevel - $level;
1672                         }
1673                         # count number of headlines for each level
1674                         @$sublevelCount[$level]++;
1675                         if( $doNumberHeadings || $doShowToc ) {
1676                                 $dot = 0;
1677                                 for( $i = 1; $i <= $level; $i++ ) {
1678                                         if( !empty( $sublevelCount[$i] ) ) {
1679                                                 if( $dot ) {
1680                                                         $numbering .= ".";
1681                                                 }
1682                                                 $numbering .= $sublevelCount[$i];
1683                                                 $dot = 1;
1684                                         }
1685                                 }
1686                         }
1687
1688                         # The canonized header is a version of the header text safe to use for links
1689                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1690                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1691
1692                         # strip out HTML
1693                         $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1694                         $tocline = trim( $canonized_headline );
1695                         $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1696                         # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1697                         $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1698                         $refer[$headlineCount] = $canonized_headline;
1699
1700                         # count how many in assoc. array so we can track dupes in anchors
1701                         @$refers[$canonized_headline]++;
1702                         $refcount[$headlineCount]=$refers[$canonized_headline];
1703
1704                         # Prepend the number to the heading text
1705
1706                         if( $doNumberHeadings || $doShowToc ) {
1707                                 $tocline = $numbering . " " . $tocline;
1708
1709                                 # Don't number the heading if it is the only one (looks silly)
1710                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1711                                         # the two are different if the line contains a link
1712                                         $headline=$numbering . " " . $headline;
1713                                 }
1714                         }
1715
1716                         # Create the anchor for linking from the TOC to the section
1717                         $anchor = $canonized_headline;
1718                         if($refcount[$headlineCount] > 1 ) {
1719                                 $anchor .= "_" . $refcount[$headlineCount];
1720                         }
1721                         if( $doShowToc ) {
1722                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1723                         }
1724                         if( $showEditLink ) {
1725                                 if ( empty( $head[$headlineCount] ) ) {
1726                                         $head[$headlineCount] = "";
1727                                 }
1728                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1729                         }
1730
1731                         # Add the edit section span
1732                         if( $rightClickHack ) {
1733                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1734                         }
1735
1736                         # give headline the correct <h#> tag
1737                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1738
1739                         $headlineCount++;
1740                 }
1741
1742                 if( $doShowToc ) {
1743                         $toclines = $headlineCount;
1744                         $toc .= $sk->tocUnindent( $toclevel );
1745                         $toc = $sk->tocTable( $toc );
1746                 }
1747
1748                 # split up and insert constructed headlines
1749
1750                 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1751                 $i = 0;
1752
1753                 foreach( $blocks as $block ) {
1754                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1755                             # This is the [edit] link that appears for the top block of text when
1756                                 # section editing is enabled
1757
1758                                 # Disabled because it broke block formatting
1759                                 # For example, a bullet point in the top line
1760                                 # $full .= $sk->editSectionLink(0);
1761                         }
1762                         $full .= $block;
1763                         if( $doShowToc && !$i && $isMain) {
1764                         # Top anchor now in skin
1765                                 $full = $full.$toc;
1766                         }
1767
1768                         if( !empty( $head[$i] ) ) {
1769                                 $full .= $head[$i];
1770                         }
1771                         $i++;
1772                 }
1773
1774                 return $full;
1775         }
1776
1777         /* private */ function magicISBN( $text )
1778         {
1779                 global $wgLang;
1780
1781                 $a = split( "ISBN ", " $text" );
1782                 if ( count ( $a ) < 2 ) return $text;
1783                 $text = substr( array_shift( $a ), 1);
1784                 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1785
1786                 foreach ( $a as $x ) {
1787                         $isbn = $blank = "" ;
1788                         while ( " " == $x{0} ) {
1789                                 $blank .= " ";
1790                                 $x = substr( $x, 1 );
1791                         }
1792                         while ( strstr( $valid, $x{0} ) != false ) {
1793                                 $isbn .= $x{0};
1794                                 $x = substr( $x, 1 );
1795                         }
1796                         $num = str_replace( "-", "", $isbn );
1797                         $num = str_replace( " ", "", $num );
1798
1799                         if ( "" == $num ) {
1800                                 $text .= "ISBN $blank$x";
1801                         } else {
1802                                 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1803                                 $text .= "<a href=\"" .
1804                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1805                                         "\" class=\"internal\">ISBN $isbn</a>";
1806                                 $text .= $x;
1807                         }
1808                 }
1809                 return $text;
1810         }
1811         /* private */ function magicRFC( $text )
1812         {
1813                 global $wgLang;
1814
1815                 $a = split( "ISBN ", " $text" );
1816                 if ( count ( $a ) < 2 ) return $text;
1817                 $text = substr( array_shift( $a ), 1);
1818                 $valid = "0123456789";
1819
1820                 foreach ( $a as $x ) {
1821                         $rfc = $blank = "" ;
1822                         while ( " " == $x{0} ) {
1823                                 $blank .= " ";
1824                                 $x = substr( $x, 1 );
1825                         }
1826                         while ( strstr( $valid, $x{0} ) != false ) {
1827                                 $rfc .= $x{0};
1828                                 $x = substr( $x, 1 );
1829                         }
1830
1831                         if ( "" == $rfc ) {
1832                                 $text .= "RFC $blank$x";
1833                         } else {
1834                                 $url = wfmsg( "rfcurl" );
1835                                 $url = str_replace( "$1", $rfc, $url);
1836                                 $sk =& $this->mOptions->getSkin();
1837                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1838                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1839                         }
1840                 }
1841                 return $text;
1842         }
1843
1844         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1845         {
1846                 $this->mOptions = $options;
1847                 $this->mTitle =& $title;
1848                 $this->mOutputType = OT_WIKI;
1849
1850                 if ( $clearState ) {
1851                         $this->clearState();
1852                 }
1853
1854                 $stripState = false;
1855                 $pairs = array(
1856                         "\r\n" => "\n",
1857                         );
1858                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1859                 // now with regexes
1860                 $pairs = array(
1861                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1862                         "/<br *?>/i" => "<br/>",
1863                 );
1864                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1865                 $text = $this->strip( $text, $stripState, false );
1866                 $text = $this->pstPass2( $text, $user );
1867                 $text = $this->unstrip( $text, $stripState );
1868                 return $text;
1869         }
1870
1871         /* private */ function pstPass2( $text, &$user )
1872         {
1873                 global $wgLang, $wgLocaltimezone, $wgCurParser;
1874
1875                 # Variable replacement
1876                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1877                 $text = $this->replaceVariables( $text );
1878
1879                 # Signatures
1880                 #
1881                 $n = $user->getName();
1882                 $k = $user->getOption( "nickname" );
1883                 if ( "" == $k ) { $k = $n; }
1884                 if(isset($wgLocaltimezone)) {
1885                         $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1886                 }
1887                 /* Note: this is an ugly timezone hack for the European wikis */
1888                 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1889                   " (" . date( "T" ) . ")";
1890                 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1891
1892                 $text = preg_replace( "/~~~~~/", $d, $text );
1893                 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1894                   Namespace::getUser() ) . ":$n|$k]] $d", $text );
1895                 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1896                   Namespace::getUser() ) . ":$n|$k]]", $text );
1897
1898                 # Context links: [[|name]] and [[name (context)|]]
1899                 #
1900                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1901                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1902                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1903                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1904
1905                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
1906                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
1907                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
1908                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1909                                                                                                                 # [[ns:page (cont)|]]
1910                 $context = "";
1911                 $t = $this->mTitle->getText();
1912                 if ( preg_match( $conpat, $t, $m ) ) {
1913                         $context = $m[2];
1914                 }
1915                 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1916                 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1917                 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1918
1919                 if ( "" == $context ) {
1920                         $text = preg_replace( $p2, "[[\\1]]", $text );
1921                 } else {
1922                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1923                 }
1924
1925                 /*
1926                 $mw =& MagicWord::get( MAG_SUBST );
1927                 $wgCurParser = $this->fork();
1928                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1929                 $this->merge( $wgCurParser );
1930                 */
1931
1932                 # Trim trailing whitespace
1933                 # MAG_END (__END__) tag allows for trailing
1934                 # whitespace to be deliberately included
1935                 $text = rtrim( $text );
1936                 $mw =& MagicWord::get( MAG_END );
1937                 $mw->matchAndRemove( $text );
1938
1939                 return $text;
1940         }
1941
1942         # Set up some variables which are usually set up in parse()
1943         # so that an external function can call some class members with confidence
1944         function startExternalParse( &$title, $options, $outputType, $clearState = true )
1945         {
1946                 $this->mTitle =& $title;
1947                 $this->mOptions = $options;
1948                 $this->mOutputType = $outputType;
1949                 if ( $clearState ) {
1950                         $this->clearState();
1951                 }
1952         }
1953
1954         function transformMsg( $text, $options ) {
1955                 global $wgTitle;
1956                 static $executing = false;
1957
1958                 # Guard against infinite recursion
1959                 if ( $executing ) {
1960                         return $text;
1961                 }
1962                 $executing = true;
1963
1964                 $this->mTitle = $wgTitle;
1965                 $this->mOptions = $options;
1966                 $this->mOutputType = OT_MSG;
1967                 $this->clearState();
1968                 $text = $this->replaceVariables( $text );
1969
1970                 $executing = false;
1971                 return $text;
1972         }
1973 }
1974
1975 class ParserOutput
1976 {
1977         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1978
1979         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1980                 $containsOldMagic = false )
1981         {
1982                 $this->mText = $text;
1983                 $this->mLanguageLinks = $languageLinks;
1984                 $this->mCategoryLinks = $categoryLinks;
1985                 $this->mContainsOldMagic = $containsOldMagic;
1986         }
1987
1988         function getText() { return $this->mText; }
1989         function getLanguageLinks() { return $this->mLanguageLinks; }
1990         function getCategoryLinks() { return $this->mCategoryLinks; }
1991         function containsOldMagic() { return $this->mContainsOldMagic; }
1992         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1993         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1994         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1995         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1996
1997         function merge( $other ) {
1998                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
1999                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2000                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2001         }
2002
2003 }
2004
2005 class ParserOptions
2006 {
2007         # All variables are private
2008         var $mUseTeX;                    # Use texvc to expand <math> tags
2009         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2010         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2011         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2012         var $mAllowExternalImages;       # Allow external images inline
2013         var $mSkin;                      # Reference to the preferred skin
2014         var $mDateFormat;                # Date format index
2015         var $mEditSection;               # Create "edit section" links
2016         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2017         var $mNumberHeadings;            # Automatically number headings
2018         var $mShowToc;                   # Show table of contents
2019
2020         function getUseTeX() { return $this->mUseTeX; }
2021         function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2022         function getUseDynamicDates() { return $this->mUseDynamicDates; }
2023         function getInterwikiMagic() { return $this->mInterwikiMagic; }
2024         function getAllowExternalImages() { return $this->mAllowExternalImages; }
2025         function getSkin() { return $this->mSkin; }
2026         function getDateFormat() { return $this->mDateFormat; }
2027         function getEditSection() { return $this->mEditSection; }
2028         function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2029         function getNumberHeadings() { return $this->mNumberHeadings; }
2030         function getShowToc() { return $this->mShowToc; }
2031
2032         function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2033         function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2034         function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2035         function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2036         function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2037         function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2038         function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2039         function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2040         function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2041         function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2042         function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2043
2044         /* static */ function newFromUser( &$user )
2045         {
2046                 $popts = new ParserOptions;
2047                 $popts->initialiseFromUser( $user );
2048                 return $popts;
2049         }
2050
2051         function initialiseFromUser( &$userInput )
2052         {
2053                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2054
2055                 if ( !$userInput ) {
2056                         $user = new User;
2057                         $user->setLoaded( true );
2058                 } else {
2059                         $user =& $userInput;
2060                 }
2061
2062                 $this->mUseTeX = $wgUseTeX;
2063                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2064                 $this->mUseDynamicDates = $wgUseDynamicDates;
2065                 $this->mInterwikiMagic = $wgInterwikiMagic;
2066                 $this->mAllowExternalImages = $wgAllowExternalImages;
2067                 $this->mSkin =& $user->getSkin();
2068                 $this->mDateFormat = $user->getOption( "date" );
2069                 $this->mEditSection = $user->getOption( "editsection" );
2070                 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2071                 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2072                 $this->mShowToc = $user->getOption( "showtoc" );
2073         }
2074
2075
2076 }
2077
2078 # Regex callbacks, used in Parser::replaceVariables
2079 function wfBraceSubstitution( $matches )
2080 {
2081         global $wgCurParser;
2082         return $wgCurParser->braceSubstitution( $matches );
2083 }
2084
2085 function wfArgSubstitution( $matches )
2086 {
2087         global $wgCurParser;
2088         return $wgCurParser->argSubstitution( $matches );
2089 }
2090
2091 ?>