includes/Parser.php

   1 <?php
   2
   3 // require_once('Tokenizer.php');
   4
   5 # PHP Parser
   6 #
   7 # Processes wiki markup
   8 #
   9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
  10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
  11 #
  12 # Globals used:
  13 #    objects:   $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
  14 #
  15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
  16 #
  17 #    settings:  $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
  18 #               $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
  19 #               $wgLocaltimezone
  20 #
  21 #      * only within ParserOptions
  22 #
  23 #
  24 #----------------------------------------
  25 #    Variable substitution O(N^2) attack
  26 #-----------------------------------------
  27 # Without countermeasures, it would be possible to attack the parser by saving a page
  28 # filled with a large number of inclusions of large pages. The size of the generated
  29 # page would be proportional to the square of the input size. Hence, we limit the number
  30 # of inclusions of any given page, thus bringing any attack back to O(N).
  31 #
  32
  33 define( "MAX_INCLUDE_REPEAT", 20 );
  34 define( "MAX_INCLUDE_SIZE", 1000000 ); // 1 Million
  35
  36 # Allowed values for $mOutputType
  37 define( "OT_HTML", 1 );
  38 define( "OT_WIKI", 2 );
  39 define( "OT_MSG", 3 );
  40
  41 # string parameter for extractTags which will cause it
  42 # to strip HTML comments in addition to regular
  43 # <XML>-style tags. This should not be anything we
  44 # may want to use in wikisyntax
  45 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
  46
  47 # prefix for escaping, used in two functions at least
  48 define( 'UNIQ_PREFIX', 'NaodW29');
  49
  50
  51 # Constants needed for external link processing
  52
  53 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
  54 define( 'HTTP_PROTOCOLS', 'http|https' );
  55 # Everything except bracket, space, or control characters
  56 define( 'EXT_LINK_URL_CLASS', '[^]\\x00-\\x20\\x7F]' );
  57 define( 'INVERSE_EXT_LINK_URL_CLASS', '[\]\\x00-\\x20\\x7F]' );
  58 # Including space
  59 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
  60 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
  61 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
  62 define( 'EXT_LINK_BRACKETED',  '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
  63 define( 'EXT_IMAGE_REGEX',
  64         '/^('.HTTP_PROTOCOLS.':)'.  # Protocol
  65         '('.EXT_LINK_URL_CLASS.'+)\\/'.  # Hostname and path
  66         '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
  67 );
  68
  69 class Parser
  70 {
  71         # Persistent:
  72         var $mTagHooks;
  73
  74         # Cleared with clearState():
  75         var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
  76         var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
  77
  78         # Temporary:
  79         var $mOptions, $mTitle, $mOutputType,
  80             $mTemplates,        // cache of already loaded templates, avoids
  81                                 // multiple SQL queries for the same string
  82             $mTemplatePath;     // stores an unsorted hash of all the templates already loaded
  83                                 // in this path. Used for loop detection.
  84
  85         function Parser() {
  86                 $this->mTemplates = array();
  87                 $this->mTemplatePath = array();
  88                 $this->mTagHooks = array();
  89                 $this->clearState();
  90         }
  91
  92         function clearState() {
  93                 $this->mOutput = new ParserOutput;
  94                 $this->mAutonumber = 0;
  95                 $this->mLastSection = "";
  96                 $this->mDTopen = false;
  97                 $this->mVariables = false;
  98                 $this->mIncludeCount = array();
  99                 $this->mStripState = array();
 100                 $this->mArgStack = array();
 101                 $this->mInPre = false;
 102         }
 103
 104         # First pass--just handle <nowiki> sections, pass the rest off
 105         # to internalParse() which does all the real work.
 106         #
 107         # Returns a ParserOutput
 108         #
 109         function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
 110                 global $wgUseTidy;
 111                 $fname = "Parser::parse";
 112                 wfProfileIn( $fname );
 113
 114                 if ( $clearState ) {
 115                         $this->clearState();
 116                 }
 117
 118                 $this->mOptions = $options;
 119                 $this->mTitle =& $title;
 120                 $this->mOutputType = OT_HTML;
 121
 122                 $stripState = NULL;
 123                 $text = $this->strip( $text, $this->mStripState );
 124                 $text = $this->internalParse( $text, $linestart );
 125                 $text = $this->unstrip( $text, $this->mStripState );
 126                 # Clean up special characters, only run once, next-to-last before doBlockLevels
 127                 if(!$wgUseTidy) {
 128                         $fixtags = array(
 129                                 # french spaces, last one Guillemet-left
 130                                 # only if there is something before the space
 131                                 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
 132                                 # french spaces, Guillemet-right
 133                                 "/(\\302\\253) /i"=>"\\1&nbsp;",
 134                                 '/<hr *>/i' => '<hr />',
 135                                 '/<br *>/i' => '<br />',
 136                                 '/<center *>/i' => '<div class="center">',
 137                                 '/<\\/center *>/i' => '</div>',
 138                                 # Clean up spare ampersands; note that we probably ought to be
 139                                 # more careful about named entities.
 140                                 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
 141                         );
 142                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 143                 } else {
 144                         $fixtags = array(
 145                                 # french spaces, last one Guillemet-left
 146                                 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
 147                                 # french spaces, Guillemet-right
 148                                 '/(\\302\\253) /i' => '\\1&nbsp;',
 149                                 '/([^> ]+(&#x30(1|3|9);)[^< ]*)/i' => '<span class="diacrit">\\1</span>',
 150                                 '/<center *>/i' => '<div class="center">',
 151                                 '/<\\/center *>/i' => '</div>'
 152                         );
 153                         $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
 154                 }
 155                 # only once and last
 156                 $text = $this->doBlockLevels( $text, $linestart );
 157                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 158                 if($wgUseTidy) {
 159                         $text = $this->tidy($text);
 160                 }
 161                 $this->mOutput->setText( $text );
 162                 wfProfileOut( $fname );
 163                 return $this->mOutput;
 164         }
 165
 166         /* static */ function getRandomString() {
 167                 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
 168         }
 169
 170         # Replaces all occurrences of <$tag>content</$tag> in the text
 171         # with a random marker and returns the new text. the output parameter
 172         # $content will be an associative array filled with data on the form
 173         # $unique_marker => content.
 174
 175         # If $content is already set, the additional entries will be appended
 176
 177         # If $tag is set to STRIP_COMMENTS, the function will extract
 178         # <!-- HTML comments -->
 179
 180         /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
 181                 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
 182                 if ( !$content ) {
 183                         $content = array( );
 184                 }
 185                 $n = 1;
 186                 $stripped = '';
 187
 188                 while ( '' != $text ) {
 189                         if($tag==STRIP_COMMENTS) {
 190                                 $p = preg_split( '/<!--/i', $text, 2 );
 191                         } else {
 192                                 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
 193                         }
 194                         $stripped .= $p[0];
 195                         if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
 196                                 $text = '';
 197                         } else {
 198                                 if($tag==STRIP_COMMENTS) {
 199                                         $q = preg_split( '/-->/i', $p[1], 2 );
 200                                 } else {
 201                                         $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
 202                                 }
 203                                 $marker = $rnd . sprintf('%08X', $n++);
 204                                 $content[$marker] = $q[0];
 205                                 $stripped .= $marker;
 206                                 $text = $q[1];
 207                         }
 208                 }
 209                 return $stripped;
 210         }
 211
 212         # Strips and renders <nowiki>, <pre>, <math>, <hiero>
 213         # If $render is set, performs necessary rendering operations on plugins
 214         # Returns the text, and fills an array with data needed in unstrip()
 215         # If the $state is already a valid strip state, it adds to the state
 216
 217         # When $stripcomments is set, HTML comments <!-- like this -->
 218         # will be stripped in addition to other tags. This is important
 219         # for section editing, where these comments cause confusion when
 220         # counting the sections in the wikisource
 221         function strip( $text, &$state, $stripcomments = false ) {
 222                 $render = ($this->mOutputType == OT_HTML);
 223                 $html_content = array();
 224                 $nowiki_content = array();
 225                 $math_content = array();
 226                 $pre_content = array();
 227                 $comment_content = array();
 228                 $ext_content = array();
 229
 230                 # Replace any instances of the placeholders
 231                 $uniq_prefix = UNIQ_PREFIX;
 232                 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 233
 234                 # html
 235                 global $wgRawHtml;
 236                 if( $wgRawHtml ) {
 237                         $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
 238                         foreach( $html_content as $marker => $content ) {
 239                                 if ($render ) {
 240                                         # Raw and unchecked for validity.
 241                                         $html_content[$marker] = $content;
 242                                 } else {
 243                                         $html_content[$marker] = "<html>$content</html>";
 244                                 }
 245                         }
 246                 }
 247
 248                 # nowiki
 249                 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
 250                 foreach( $nowiki_content as $marker => $content ) {
 251                         if( $render ){
 252                                 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
 253                         } else {
 254                                 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
 255                         }
 256                 }
 257
 258                 # math
 259                 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
 260                 foreach( $math_content as $marker => $content ){
 261                         if( $render ) {
 262                                 if( $this->mOptions->getUseTeX() ) {
 263                                         $math_content[$marker] = renderMath( $content );
 264                                 } else {
 265                                         $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
 266                                 }
 267                         } else {
 268                                 $math_content[$marker] = "<math>$content</math>";
 269                         }
 270                 }
 271
 272                 # pre
 273                 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
 274                 foreach( $pre_content as $marker => $content ){
 275                         if( $render ){
 276                                 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
 277                         } else {
 278                                 $pre_content[$marker] = "<pre>$content</pre>";
 279                         }
 280                 }
 281
 282                 # Comments
 283                 if($stripcomments) {
 284                         $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
 285                         foreach( $comment_content as $marker => $content ){
 286                                 $comment_content[$marker] = "<!--$content-->";
 287                         }
 288                 }
 289
 290                 # Extensions
 291                 foreach ( $this->mTagHooks as $tag => $callback ) {
 292                         $ext_contents[$tag] = array();
 293                         $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
 294                         foreach( $ext_content[$tag] as $marker => $content ) {
 295                                 if ( $render ) {
 296                                         $ext_content[$tag][$marker] = $callback( $content );
 297                                 } else {
 298                                         $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
 299                                 }
 300                         }
 301                 }
 302
 303                 # Merge state with the pre-existing state, if there is one
 304                 if ( $state ) {
 305                         $state['html'] = $state['html'] + $html_content;
 306                         $state['nowiki'] = $state['nowiki'] + $nowiki_content;
 307                         $state['math'] = $state['math'] + $math_content;
 308                         $state['pre'] = $state['pre'] + $pre_content;
 309                         $state['comment'] = $state['comment'] + $comment_content;
 310
 311                         foreach( $ext_content as $tag => $array ) {
 312                                 if ( array_key_exists( $tag, $state ) ) {
 313                                         $state[$tag] = $state[$tag] + $array;
 314                                 }
 315                         }
 316                 } else {
 317                         $state = array(
 318                           'html' => $html_content,
 319                           'nowiki' => $nowiki_content,
 320                           'math' => $math_content,
 321                           'pre' => $pre_content,
 322                           'comment' => $comment_content,
 323                         ) + $ext_content;
 324                 }
 325                 return $text;
 326         }
 327
 328         # always call unstripNoWiki() after this one
 329         function unstrip( $text, &$state ) {
 330                 # Must expand in reverse order, otherwise nested tags will be corrupted
 331                 $contentDict = end( $state );
 332                 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
 333                         if( key($state) != 'nowiki' && key($state) != 'html') {
 334                                 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
 335                                         $text = str_replace( key( $contentDict ), $content, $text );
 336                                 }
 337                         }
 338                 }
 339
 340                 return $text;
 341         }
 342         # always call this after unstrip() to preserve the order
 343         function unstripNoWiki( $text, &$state ) {
 344                 # Must expand in reverse order, otherwise nested tags will be corrupted
 345                 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
 346                         $text = str_replace( key( $state['nowiki'] ), $content, $text );
 347                 }
 348
 349                 global $wgRawHtml;
 350                 if ($wgRawHtml) {
 351                         for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
 352                                 $text = str_replace( key( $state['html'] ), $content, $text );
 353                         }
 354                 }
 355
 356                 return $text;
 357         }
 358
 359         # Add an item to the strip state
 360         # Returns the unique tag which must be inserted into the stripped text
 361         # The tag will be replaced with the original text in unstrip()
 362
 363         function insertStripItem( $text, &$state ) {
 364                 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
 365                 if ( !$state ) {
 366                         $state = array(
 367                           'html' => array(),
 368                           'nowiki' => array(),
 369                           'math' => array(),
 370                           'pre' => array()
 371                         );
 372                 }
 373                 $state['item'][$rnd] = $text;
 374                 return $rnd;
 375         }
 376
 377         # categoryMagic
 378         # generate a list of subcategories and pages for a category
 379         # depending on wfMsg("usenewcategorypage") it either calls the new
 380         # or the old code. The new code will not work properly for some
 381         # languages due to sorting issues, so they might want to turn it
 382         # off.
 383         function categoryMagic() {
 384                 $msg = wfMsg('usenewcategorypage');
 385                 if ( '0' == @$msg[0] )
 386                 {
 387                         return $this->oldCategoryMagic();
 388                 } else {
 389                         return $this->newCategoryMagic();
 390                 }
 391         }
 392
 393         # This method generates the list of subcategories and pages for a category
 394         function oldCategoryMagic () {
 395                 global $wgLang ;
 396                 $fname = 'Parser::oldCategoryMagic';
 397
 398                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 399
 400                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return "" ; # This ain't a category page
 401
 402                 $r = "<br style=\"clear:both;\"/>\n";
 403
 404
 405                 $sk =& $this->mOptions->getSkin() ;
 406
 407                 $articles = array() ;
 408                 $children = array() ;
 409                 $data = array () ;
 410                 $id = $this->mTitle->getArticleID() ;
 411
 412                 # FIXME: add limits
 413                 $dbr =& wfGetDB( DB_SLAVE );
 414                 $cur = $dbr->tableName( 'cur' );
 415                 $categorylinks = $dbr->tableName( 'categorylinks' );
 416
 417                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 418                 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM $cur,$categorylinks " .
 419                         "WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 420                 $res = $dbr->query( $sql, $fname ) ;
 421                 while ( $x = $dbr->fetchObject ( $res ) ) $data[] = $x ;
 422
 423                 # For all pages that link to this category
 424                 foreach ( $data AS $x )
 425                 {
 426                         $t = $wgLang->getNsText ( $x->cur_namespace ) ;
 427                         if ( $t != "" ) $t .= ":" ;
 428                         $t .= $x->cur_title ;
 429
 430                         if ( $x->cur_namespace == NS_CATEGORY ) {
 431                                 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
 432                         } else {
 433                                 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
 434                         }
 435                 }
 436                 $dbr->freeResult ( $res ) ;
 437
 438                 # Showing subcategories
 439                 if ( count ( $children ) > 0 ) {
 440                         $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
 441                         $r .= implode ( ', ' , $children ) ;
 442                 }
 443
 444                 # Showing pages in this category
 445                 if ( count ( $articles ) > 0 ) {
 446                         $ti = $this->mTitle->getText() ;
 447                         $h =  wfMsg( 'category_header', $ti );
 448                         $r .= "<h2>{$h}</h2>\n" ;
 449                         $r .= implode ( ', ' , $articles ) ;
 450                 }
 451
 452                 return $r ;
 453         }
 454
 455
 456
 457         function newCategoryMagic () {
 458                 global $wgLang;
 459                 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
 460
 461                 if ( $this->mTitle->getNamespace() != NS_CATEGORY ) return '' ; # This ain't a category page
 462
 463                 $r = "<br style=\"clear:both;\"/>\n";
 464
 465
 466                 $sk =& $this->mOptions->getSkin() ;
 467
 468                 $articles = array() ;
 469                 $articles_start_char = array();
 470                 $children = array() ;
 471                 $children_start_char = array();
 472                 $data = array () ;
 473                 $id = $this->mTitle->getArticleID() ;
 474
 475                 # FIXME: add limits
 476                 $dbr =& wfGetDB( DB_SLAVE );
 477                 $cur = $dbr->tableName( 'cur' );
 478                 $categorylinks = $dbr->tableName( 'categorylinks' );
 479
 480                 $t = $dbr->strencode( $this->mTitle->getDBKey() );
 481                 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM " .
 482                         "$cur,$categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
 483                 $res = $dbr->query ( $sql ) ;
 484                 while ( $x = $dbr->fetchObject ( $res ) )
 485                 {
 486                         $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
 487                         if ( $t != '' ) $t .= ':' ;
 488                         $t .= $x->cur_title ;
 489
 490                         if ( $x->cur_namespace == NS_CATEGORY ) {
 491                                 $ctitle = str_replace( '_',' ',$x->cur_title );
 492                                 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
 493
 494                                 // If there's a link from Category:A to Category:B, the sortkey of the resulting
 495                                 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
 496                                 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
 497                                 // else use sortkey...
 498                                 if ( ($ns.":".$ctitle) ==  $x->cl_sortkey ) {
 499                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
 500                                 } else {
 501                                         array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 502                                 }
 503                         } else {
 504                                 array_push ( $articles , $sk->makeKnownLink ( $t ) ) ; # Page in this category
 505                                 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
 506                         }
 507                 }
 508                 $dbr->freeResult ( $res ) ;
 509
 510                 $ti = $this->mTitle->getText() ;
 511
 512                 # Don't show subcategories section if there are none.
 513                 if ( count ( $children ) > 0 )
 514                 {
 515                         # Showing subcategories
 516                         $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
 517
 518                         $numchild = count( $children );
 519                         if($numchild == 1) {
 520                                 $r .= wfMsg( 'subcategorycount1', 1 );
 521                         } else {
 522                                 $r .= wfMsg( 'subcategorycount' , $numchild );
 523                         }
 524                         unset($numchild);
 525
 526                         if ( count ( $children ) > 6 ) {
 527
 528                                 // divide list into three equal chunks
 529                                 $chunk = (int) (count ( $children ) / 3);
 530
 531                                 // get and display header
 532                                 $r .= '<table width="100%"><tr valign="top">';
 533
 534                                 $startChunk = 0;
 535                                 $endChunk = $chunk;
 536
 537                                 // loop through the chunks
 538                                 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 539                                         $chunkIndex < 3;
 540                                         $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 541                                 {
 542
 543                                         $r .= '<td><ul>';
 544                                         // output all subcategories to category
 545                                         for ($index = $startChunk ;
 546                                                 $index < $endChunk && $index < count($children);
 547                                                 $index++ )
 548                                         {
 549                                                 // check for change of starting letter or begging of chunk
 550                                                 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
 551                                                         || ($index == $startChunk) )
 552                                                 {
 553                                                         $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 554                                                 }
 555
 556                                                 $r .= "<li>{$children[$index]}</li>";
 557                                         }
 558                                         $r .= '</ul></td>';
 559
 560
 561                                 }
 562                                 $r .= '</tr></table>';
 563                         } else {
 564                                 // for short lists of subcategories to category.
 565
 566                                 $r .= "<h3>{$children_start_char[0]}</h3>\n";
 567                                 $r .= '<ul><li>'.$children[0].'</li>';
 568                                 for ($index = 1; $index < count($children); $index++ )
 569                                 {
 570                                         if ($children_start_char[$index] != $children_start_char[$index - 1])
 571                                         {
 572                                                 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
 573                                         }
 574
 575                                         $r .= "<li>{$children[$index]}</li>";
 576                                 }
 577                                 $r .= '</ul>';
 578                         }
 579                 } # END of if ( count($children) > 0 )
 580
 581                 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
 582
 583                 $numart = count( $articles );
 584                 if($numart == 1) {
 585                         $r .= wfMsg( 'categoryarticlecount1', 1 );
 586                 } else {
 587                         $r .= wfMsg( 'categoryarticlecount' , $numart );
 588                 }
 589                 unset($numart);
 590
 591                 # Showing articles in this category
 592                 if ( count ( $articles ) > 6) {
 593                         $ti = $this->mTitle->getText() ;
 594
 595                         // divide list into three equal chunks
 596                         $chunk = (int) (count ( $articles ) / 3);
 597
 598                         // get and display header
 599                         $r .= '<table width="100%"><tr valign="top">';
 600
 601                         // loop through the chunks
 602                         for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
 603                                 $chunkIndex < 3;
 604                                 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
 605                         {
 606
 607                                 $r .= '<td><ul>';
 608
 609                                 // output all articles in category
 610                                 for ($index = $startChunk ;
 611                                         $index < $endChunk && $index < count($articles);
 612                                         $index++ )
 613                                 {
 614                                         // check for change of starting letter or begging of chunk
 615                                         if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
 616                                                 || ($index == $startChunk) )
 617                                         {
 618                                                 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 619                                         }
 620
 621                                         $r .= "<li>{$articles[$index]}</li>";
 622                                 }
 623                                 $r .= '</ul></td>';
 624
 625
 626                         }
 627                         $r .= '</tr></table>';
 628                 } elseif ( count ( $articles )  > 0) {
 629                         // for short lists of articles in categories.
 630                         $ti = $this->mTitle->getText() ;
 631
 632                         $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
 633                         $r .= '<ul><li>'.$articles[0].'</li>';
 634                         for ($index = 1; $index < count($articles); $index++ )
 635                         {
 636                                 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
 637                                 {
 638                                         $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
 639                                 }
 640
 641                                 $r .= "<li>{$articles[$index]}</li>";
 642                         }
 643                         $r .= '</ul>';
 644                 }
 645
 646
 647                 return $r ;
 648         }
 649
 650         # Return allowed HTML attributes
 651         function getHTMLattrs () {
 652                 $htmlattrs = array( # Allowed attributes--no scripting, etc.
 653                                 'title', 'align', 'lang', 'dir', 'width', 'height',
 654                                 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
 655                                 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
 656                                 /* FONT */ 'type', 'start', 'value', 'compact',
 657                                 /* For various lists, mostly deprecated but safe */
 658                                 'summary', 'width', 'border', 'frame', 'rules',
 659                                 'cellspacing', 'cellpadding', 'valign', 'char',
 660                                 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
 661                                 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
 662                                 'id', 'class', 'name', 'style' /* For CSS */
 663                                 );
 664                 return $htmlattrs ;
 665         }
 666
 667         # Remove non approved attributes and javascript in css
 668         function fixTagAttributes ( $t ) {
 669                 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
 670                 $htmlattrs = $this->getHTMLattrs() ;
 671
 672                 # Strip non-approved attributes from the tag
 673                 $t = preg_replace(
 674                         '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
 675                         "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
 676                         $t);
 677
 678                 $t = str_replace ( "<></>" , "" , $t ) ; # This should fix bug 980557
 679
 680                 # Strip javascript "expression" from stylesheets. Brute force approach:
 681                 # If anythin offensive is found, all attributes of the HTML tag are dropped
 682
 683                 if( preg_match(
 684                         '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
 685                         wfMungeToUtf8( $t ) ) )
 686                 {
 687                         $t='';
 688                 }
 689
 690                 return trim ( $t ) ;
 691         }
 692
 693         # interface with html tidy, used if $wgUseTidy = true
 694         function tidy ( $text ) {
 695                 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
 696                 global $wgInputEncoding, $wgOutputEncoding;
 697                 $fname = 'Parser::tidy';
 698                 wfProfileIn( $fname );
 699
 700                 $cleansource = '';
 701                 switch(strtoupper($wgOutputEncoding)) {
 702                         case 'ISO-8859-1':
 703                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
 704                                 break;
 705                         case 'UTF-8':
 706                                 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
 707                                 break;
 708                         default:
 709                                 $wgTidyOpts .= ' -raw';
 710                         }
 711
 712                 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
 713 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
 714 '<head><title>test</title></head><body>'.$text.'</body></html>';
 715                 $descriptorspec = array(
 716                         0 => array('pipe', 'r'),
 717                         1 => array('pipe', 'w'),
 718                         2 => array('file', '/dev/null', 'a')
 719                 );
 720                 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
 721                 if (is_resource($process)) {
 722                         fwrite($pipes[0], $wrappedtext);
 723                         fclose($pipes[0]);
 724                         while (!feof($pipes[1])) {
 725                                 $cleansource .= fgets($pipes[1], 1024);
 726                         }
 727                         fclose($pipes[1]);
 728                         $return_value = proc_close($process);
 729                 }
 730
 731                 wfProfileOut( $fname );
 732
 733                 if( $cleansource == '' && $text != '') {
 734                         wfDebug( "Tidy error detected!\n" );
 735                         return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
 736                 } else {
 737                         return $cleansource;
 738                 }
 739         }
 740
 741         # parse the wiki syntax used to render tables
 742         function doTableStuff ( $t ) {
 743                 $fname = 'Parser::doTableStuff';
 744                 wfProfileIn( $fname );
 745
 746                 $t = explode ( "\n" , $t ) ;
 747                 $td = array () ; # Is currently a td tag open?
 748                 $ltd = array () ; # Was it TD or TH?
 749                 $tr = array () ; # Is currently a tr tag open?
 750                 $ltr = array () ; # tr attributes
 751                 $indent_level = 0; # indent level of the table
 752                 foreach ( $t AS $k => $x )
 753                 {
 754                         $x = trim ( $x ) ;
 755                         $fc = substr ( $x , 0 , 1 ) ;
 756                         if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) )
 757                         {
 758                                 $indent_level = strlen( $matches[1] );
 759                                 $t[$k] = "\n" .
 760                                         str_repeat( "<dl><dd>", $indent_level ) .
 761                                         "<table " . $this->fixTagAttributes ( $matches[2] ) . '>' ;
 762                                 array_push ( $td , false ) ;
 763                                 array_push ( $ltd , '' ) ;
 764                                 array_push ( $tr , false ) ;
 765                                 array_push ( $ltr , '' ) ;
 766                         }
 767                         else if ( count ( $td ) == 0 ) { } # Don't do any of the following
 768                         else if ( '|}' == substr ( $x , 0 , 2 ) )
 769                         {
 770                                 $z = "</table>\n" ;
 771                                 $l = array_pop ( $ltd ) ;
 772                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 773                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 774                                 array_pop ( $ltr ) ;
 775                                 $t[$k] = $z . str_repeat( "</dd></dl>", $indent_level );
 776                         }
 777                         else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
 778                         {
 779                                 $x = substr ( $x , 1 ) ;
 780                                 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
 781                                 $z = '' ;
 782                                 $l = array_pop ( $ltd ) ;
 783                                 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
 784                                 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 785                                 array_pop ( $ltr ) ;
 786                                 $t[$k] = $z ;
 787                                 array_push ( $tr , false ) ;
 788                                 array_push ( $td , false ) ;
 789                                 array_push ( $ltd , '' ) ;
 790                                 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
 791                         }
 792                         else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
 793                         {
 794                                 if ( '|+' == substr ( $x , 0 , 2 ) )
 795                                 {
 796                                         $fc = '+' ;
 797                                         $x = substr ( $x , 1 ) ;
 798                                 }
 799                                 $after = substr ( $x , 1 ) ;
 800                                 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
 801                                 $after = explode ( '||' , $after ) ;
 802                                 $t[$k] = '' ;
 803                                 foreach ( $after AS $theline )
 804                                 {
 805                                         $z = '' ;
 806                                         if ( $fc != '+' )
 807                                         {
 808                                                 $tra = array_pop ( $ltr ) ;
 809                                                 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
 810                                                 array_push ( $tr , true ) ;
 811                                                 array_push ( $ltr , '' ) ;
 812                                         }
 813
 814                                         $l = array_pop ( $ltd ) ;
 815                                         if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
 816                                         if ( $fc == '|' ) $l = 'td' ;
 817                                         else if ( $fc == '!' ) $l = 'th' ;
 818                                         else if ( $fc == '+' ) $l = 'caption' ;
 819                                         else $l = '' ;
 820                                         array_push ( $ltd , $l ) ;
 821                                         $y = explode ( '|' , $theline , 2 ) ;
 822                                         if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
 823                                         else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
 824                                         $t[$k] .= $y ;
 825                                         array_push ( $td , true ) ;
 826                                 }
 827                         }
 828                 }
 829
 830                 # Closing open td, tr && table
 831                 while ( count ( $td ) > 0 )
 832                 {
 833                         if ( array_pop ( $td ) ) $t[] = '</td>' ;
 834                         if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
 835                         $t[] = '</table>' ;
 836                 }
 837
 838                 $t = implode ( "\n" , $t ) ;
 839                 #               $t = $this->removeHTMLtags( $t );
 840                 wfProfileOut( $fname );
 841                 return $t ;
 842         }
 843
 844         # Parses the text and adds the result to the strip state
 845         # Returns the strip tag
 846         function stripParse( $text, $newline, $args )
 847         {
 848                 $text = $this->strip( $text, $this->mStripState );
 849                 $text = $this->internalParse( $text, (bool)$newline, $args, false );
 850                 return $newline.$this->insertStripItem( $text, $this->mStripState );
 851         }
 852
 853         function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
 854                 $fname = 'Parser::internalParse';
 855                 wfProfileIn( $fname );
 856
 857                 $text = $this->removeHTMLtags( $text );
 858                 $text = $this->replaceVariables( $text, $args );
 859
 860                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
 861
 862                 $text = $this->doHeadings( $text );
 863                 if($this->mOptions->getUseDynamicDates()) {
 864                         global $wgDateFormatter;
 865                         $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
 866                 }
 867                 $text = $this->doAllQuotes( $text );
 868                 $text = $this->replaceExternalLinks( $text );
 869                 $text = $this->doMagicLinks( $text );
 870                 $text = $this->replaceInternalLinks ( $text );
 871                 $text = $this->replaceInternalLinks ( $text );
 872
 873                 $text = $this->unstrip( $text, $this->mStripState );
 874                 $text = $this->unstripNoWiki( $text, $this->mStripState );
 875
 876                 $text = $this->doTableStuff( $text );
 877                 $text = $this->formatHeadings( $text, $isMain );
 878                 $sk =& $this->mOptions->getSkin();
 879                 $text = $sk->transformContent( $text );
 880
 881                 if ( $isMain && !isset ( $this->categoryMagicDone ) ) {
 882                         $text .= $this->categoryMagic () ;
 883                         $this->categoryMagicDone = true ;
 884                 }
 885
 886                 wfProfileOut( $fname );
 887                 return $text;
 888         }
 889
 890         /* private */ function &doMagicLinks( &$text ) {
 891                 $text = $this->magicISBN( $text );
 892                 $text = $this->magicGEO( $text );
 893                 $text = $this->magicRFC( $text );
 894                 return $text;
 895         }
 896
 897         # Parse ^^ tokens and return html
 898         /* private */ function doExponent ( $text )
 899         {
 900                 $fname = 'Parser::doExponent';
 901                 wfProfileIn( $fname);
 902                 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
 903                 wfProfileOut( $fname);
 904                 return $text;
 905         }
 906
 907     # Parse headers and return html
 908         /* private */ function doHeadings( $text ) {
 909                 $fname = 'Parser::doHeadings';
 910                 wfProfileIn( $fname );
 911                 for ( $i = 6; $i >= 1; --$i ) {
 912                         $h = substr( '======', 0, $i );
 913                         $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
 914                           "<h{$i}>\\1</h{$i}>\\2", $text );
 915                 }
 916                 wfProfileOut( $fname );
 917                 return $text;
 918         }
 919
 920         /* private */ function doAllQuotes( $text ) {
 921                 $fname = 'Parser::doAllQuotes';
 922                 wfProfileIn( $fname );
 923                 $outtext = '';
 924                 $lines = explode( "\n", $text );
 925                 foreach ( $lines as $line ) {
 926                         $outtext .= $this->doQuotes ( $line ) . "\n";
 927                 }
 928                 $outtext = substr($outtext, 0,-1);
 929                 wfProfileOut( $fname );
 930                 return $outtext;
 931         }
 932
 933         /* private */ function doQuotes( $text ) {
 934                 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 935                 if (count ($arr) == 1)
 936                         return $text;
 937                 else
 938                 {
 939                         # First, do some preliminary work. This may shift some apostrophes from
 940                         # being mark-up to being text. It also counts the number of occurrences
 941                         # of bold and italics mark-ups.
 942                         $i = 0;
 943                         $numbold = 0;
 944                         $numitalics = 0;
 945                         foreach ($arr as $r)
 946                         {
 947                                 if (($i % 2) == 1)
 948                                 {
 949                                         # If there are ever four apostrophes, assume the first is supposed to
 950                                         # be text, and the remaining three constitute mark-up for bold text.
 951                                         if (strlen ($arr[$i]) == 4)
 952                                         {
 953                                                 $arr[$i-1] .= "'";
 954                                                 $arr[$i] = "'''";
 955                                         }
 956                                         # If there are more than 5 apostrophes in a row, assume they're all
 957                                         # text except for the last 5.
 958                                         else if (strlen ($arr[$i]) > 5)
 959                                         {
 960                                                 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
 961                                                 $arr[$i] = "'''''";
 962                                         }
 963                                         # Count the number of occurrences of bold and italics mark-ups.
 964                                         # We are not counting sequences of five apostrophes.
 965                                         if (strlen ($arr[$i]) == 2) $numitalics++;  else
 966                                         if (strlen ($arr[$i]) == 3) $numbold++;     else
 967                                         if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
 968                                 }
 969                                 $i++;
 970                         }
 971
 972                         # If there is an odd number of both bold and italics, it is likely
 973                         # that one of the bold ones was meant to be an apostrophe followed
 974                         # by italics. Which one we cannot know for certain, but it is more
 975                         # likely to be one that has a single-letter word before it.
 976                         if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
 977                         {
 978                                 $i = 0;
 979                                 $firstsingleletterword = -1;
 980                                 $firstmultiletterword = -1;
 981                                 $firstspace = -1;
 982                                 foreach ($arr as $r)
 983                                 {
 984                                         if (($i % 2 == 1) and (strlen ($r) == 3))
 985                                         {
 986                                                 $x1 = substr ($arr[$i-1], -1);
 987                                                 $x2 = substr ($arr[$i-1], -2, 1);
 988                                                 if ($x1 == " ") {
 989                                                         if ($firstspace == -1) $firstspace = $i;
 990                                                 } else if ($x2 == " ") {
 991                                                         if ($firstsingleletterword == -1) $firstsingleletterword = $i;
 992                                                 } else {
 993                                                         if ($firstmultiletterword == -1) $firstmultiletterword = $i;
 994                                                 }
 995                                         }
 996                                         $i++;
 997                                 }
 998
 999                                 # If there is a single-letter word, use it!
1000                                 if ($firstsingleletterword > -1)
1001                                 {
1002                                         $arr [ $firstsingleletterword ] = "''";
1003                                         $arr [ $firstsingleletterword-1 ] .= "'";
1004                                 }
1005                                 # If not, but there's a multi-letter word, use that one.
1006                                 else if ($firstmultiletterword > -1)
1007                                 {
1008                                         $arr [ $firstmultiletterword ] = "''";
1009                                         $arr [ $firstmultiletterword-1 ] .= "'";
1010                                 }
1011                                 # ... otherwise use the first one that has neither.
1012                                 # (notice that it is possible for all three to be -1 if, for example,
1013                                 # there is only one pentuple-apostrophe in the line)
1014                                 else if ($firstspace > -1)
1015                                 {
1016                                         $arr [ $firstspace ] = "''";
1017                                         $arr [ $firstspace-1 ] .= "'";
1018                                 }
1019                         }
1020
1021                         # Now let's actually convert our apostrophic mush to HTML!
1022                         $output = '';
1023                         $buffer = '';
1024                         $state = '';
1025                         $i = 0;
1026                         foreach ($arr as $r)
1027                         {
1028                                 if (($i % 2) == 0)
1029                                 {
1030                                         if ($state == 'both')
1031                                                 $buffer .= $r;
1032                                         else
1033                                                 $output .= $r;
1034                                 }
1035                                 else
1036                                 {
1037                                         if (strlen ($r) == 2)
1038                                         {
1039                                                 if ($state == 'em')
1040                                                 { $output .= "</em>"; $state = ''; }
1041                                                 else if ($state == 'strongem')
1042                                                 { $output .= "</em>"; $state = 'strong'; }
1043                                                 else if ($state == 'emstrong')
1044                                                 { $output .= "</strong></em><strong>"; $state = 'strong'; }
1045                                                 else if ($state == 'both')
1046                                                 { $output .= "<strong><em>{$buffer}</em>"; $state = 'strong'; }
1047                                                 else # $state can be 'strong' or ''
1048                                                 { $output .= "<em>"; $state .= 'em'; }
1049                                         }
1050                                         else if (strlen ($r) == 3)
1051                                         {
1052                                                 if ($state == 'strong')
1053                                                 { $output .= "</strong>"; $state = ''; }
1054                                                 else if ($state == 'strongem')
1055                                                 { $output .= "</em></strong><em>"; $state = 'em'; }
1056                                                 else if ($state == 'emstrong')
1057                                                 { $output .= "</strong>"; $state = 'em'; }
1058                                                 else if ($state == 'both')
1059                                                 { $output .= "<em><strong>{$buffer}</strong>"; $state = 'em'; }
1060                                                 else # $state can be 'em' or ''
1061                                                 { $output .= "<strong>"; $state .= 'strong'; }
1062                                         }
1063                                         else if (strlen ($r) == 5)
1064                                         {
1065                                                 if ($state == 'strong')
1066                                                 { $output .= "</strong><em>"; $state = 'em'; }
1067                                                 else if ($state == 'em')
1068                                                 { $output .= "</em><strong>"; $state = 'strong'; }
1069                                                 else if ($state == 'strongem')
1070                                                 { $output .= "</em></strong>"; $state = ''; }
1071                                                 else if ($state == 'emstrong')
1072                                                 { $output .= "</strong></em>"; $state = ''; }
1073                                                 else if ($state == 'both')
1074                                                 { $output .= "<em><strong>{$buffer}</strong></em>"; $state = ''; }
1075                                                 else # ($state == '')
1076                                                 { $buffer = ''; $state = 'both'; }
1077                                         }
1078                                 }
1079                                 $i++;
1080                         }
1081                         # Now close all remaining tags.  Notice that the order is important.
1082                         if ($state == 'strong' || $state == 'emstrong')
1083                                 $output .= "</strong>";
1084                         if ($state == 'em' || $state == 'strongem' || $state == 'emstrong')
1085                                 $output .= "</em>";
1086                         if ($state == 'strongem')
1087                                 $output .= "</strong>";
1088                         if ($state == 'both')
1089                                 $output .= "<strong><em>{$buffer}</em></strong>";
1090                         return $output;
1091                 }
1092         }
1093
1094         # Note: we have to do external links before the internal ones,
1095         # and otherwise take great care in the order of things here, so
1096         # that we don't end up interpreting some URLs twice.
1097
1098         /* private */ function replaceExternalLinks( $text ) {
1099                 $fname = 'Parser::replaceExternalLinks';
1100                 wfProfileIn( $fname );
1101
1102                 $sk =& $this->mOptions->getSkin();
1103                 $linktrail = wfMsg('linktrail');
1104                 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1105
1106                 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
1107
1108                 $i = 0;
1109                 while ( $i<count( $bits ) ) {
1110                         $url = $bits[$i++];
1111                         $protocol = $bits[$i++];
1112                         $text = $bits[$i++];
1113                         $trail = $bits[$i++];
1114
1115                         # If the link text is an image URL, replace it with an <img> tag
1116                         # This happened by accident in the original parser, but some people used it extensively
1117                         $img = $this->maybeMakeImageLink( $text );
1118                         if ( $img !== false ) {
1119                                 $text = $img;
1120                         }
1121
1122                         $dtrail = '';
1123
1124                         # No link text, e.g. [http://domain.tld/some.link]
1125                         if ( $text == '' ) {
1126                                 # Autonumber if allowed
1127                                 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
1128                                         $text = "[" . ++$this->mAutonumber . "]";
1129                                 } else {
1130                                         # Otherwise just use the URL
1131                                         $text = htmlspecialchars( $url );
1132                                 }
1133                         } else {
1134                                 # Have link text, e.g. [http://domain.tld/some.link text]s
1135                                 # Check for trail
1136                                 if ( preg_match( $linktrail, $trail, $m2 ) ) {
1137                                         $dtrail = $m2[1];
1138                                         $trail = $m2[2];
1139                                 }
1140                         }
1141
1142                         $encUrl = htmlspecialchars( $url );
1143                         # Bit in parentheses showing the URL for the printable version
1144                         if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $url ) ) {
1145                                 $paren = '';
1146                         } else {
1147                                 # Expand the URL for printable version
1148                                 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
1149                         }
1150
1151                         # Process the trail (i.e. everything after this link up until start of the next link),
1152                         # replacing any non-bracketed links
1153                         $trail = $this->replaceFreeExternalLinks( $trail );
1154
1155                         $la = $sk->getExternalLinkAttributes( $url, $text );
1156
1157                         # Use the encoded URL
1158                         # This means that users can paste URLs directly into the text
1159                         # Funny characters like &ouml; aren't valid in URLs anyway
1160                         # This was changed in August 2004
1161                         $s .= "<a href=\"{$url}\" {$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
1162                 }
1163
1164                 wfProfileOut( $fname );
1165                 return $s;
1166         }
1167
1168         # Replace anything that looks like a URL with a link
1169         function replaceFreeExternalLinks( $text ) {
1170                 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1171                 $s = array_shift( $bits );
1172                 $i = 0;
1173
1174                 $sk =& $this->mOptions->getSkin();
1175
1176                 while ( $i < count( $bits ) ){
1177                         $protocol = $bits[$i++];
1178                         $remainder = $bits[$i++];
1179
1180                         if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1181                                 # Found some characters after the protocol that look promising
1182                                 $url = $protocol . $m[1];
1183                                 $trail = $m[2];
1184
1185                                 # Move trailing punctuation to $trail
1186                                 $sep = ',;\.:!?';
1187                                 # If there is no left bracket, then consider right brackets fair game too
1188                                 if ( strpos( $url, '(' ) === false ) {
1189                                         $sep .= ')';
1190                                 }
1191
1192                                 $numSepChars = strspn( strrev( $url ), $sep );
1193                                 if ( $numSepChars ) {
1194                                         $trail = substr( $url, -$numSepChars ) . $trail;
1195                                         $url = substr( $url, 0, -$numSepChars );
1196                                 }
1197
1198                                 # Replace &amp; from obsolete syntax with &
1199                                 $url = str_replace( '&amp;', '&', $url );
1200
1201                                 # Is this an external image?
1202                                 $text = $this->maybeMakeImageLink( $url );
1203                                 if ( $text === false ) {
1204                                         # Not an image, make a link
1205                                         $text = $sk->makeExternalLink( $url, $url );
1206                                 }
1207                                 $s .= $text . $trail;
1208                         } else {
1209                                 $s .= $protocol . $remainder;
1210                         }
1211                 }
1212                 return $s;
1213         }
1214
1215         function maybeMakeImageLink( $url ) {
1216                 $sk =& $this->mOptions->getSkin();
1217                 $text = false;
1218                 if ( $this->mOptions->getAllowExternalImages() ) {
1219                         if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1220                                 # Image found
1221                                 $text = $sk->makeImage( htmlspecialchars( $url ) );
1222                         }
1223                 }
1224                 return $text;
1225         }
1226
1227         /* private */ function replaceInternalLinks( $s ) {
1228                 global $wgLang, $wgLinkCache;
1229                 global $wgNamespacesWithSubpages, $wgLanguageCode;
1230                 static $fname = 'Parser::replaceInternalLinks' ;
1231                 wfProfileIn( $fname );
1232
1233                 wfProfileIn( $fname.'-setup' );
1234                 static $tc = FALSE;
1235                 # the % is needed to support urlencoded titles as well
1236                 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1237                 $sk =& $this->mOptions->getSkin();
1238
1239                 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1240
1241                 $a = explode( '[[', ' ' . $s );
1242                 $s = array_shift( $a );
1243                 $s = substr( $s, 1 );
1244
1245                 # Match a link having the form [[namespace:link|alternate]]trail
1246                 static $e1 = FALSE;
1247                 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1248                 # Match the end of a line for a word that's not followed by whitespace,
1249                 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1250                 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1251
1252                 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1253                 # Special and Media are pseudo-namespaces; no pages actually exist in them
1254
1255                 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1256
1257                 if ( $useLinkPrefixExtension ) {
1258                         if ( preg_match( $e2, $s, $m ) ) {
1259                                 $first_prefix = $m[2];
1260                                 $s = $m[1];
1261                         } else {
1262                                 $first_prefix = false;
1263                         }
1264                 } else {
1265                         $prefix = '';
1266                 }
1267
1268                 wfProfileOut( $fname.'-setup' );
1269
1270                 foreach ( $a as $line ) {
1271                         wfProfileIn( $fname.'-prefixhandling' );
1272                         if ( $useLinkPrefixExtension ) {
1273                                 if ( preg_match( $e2, $s, $m ) ) {
1274                                         $prefix = $m[2];
1275                                         $s = $m[1];
1276                                 } else {
1277                                         $prefix='';
1278                                 }
1279                                 # first link
1280                                 if($first_prefix) {
1281                                         $prefix = $first_prefix;
1282                                         $first_prefix = false;
1283                                 }
1284                         }
1285                         wfProfileOut( $fname.'-prefixhandling' );
1286
1287                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1288                                 $text = $m[2];
1289                                 # fix up urlencoded title texts
1290                                 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1291                                 $trail = $m[3];
1292                         } else { # Invalid form; output directly
1293                                 $s .= $prefix . '[[' . $line ;
1294                                 continue;
1295                         }
1296
1297                         /* Valid link forms:
1298                         Foobar -- normal
1299                         :Foobar -- override special treatment of prefix (images, language links)
1300                         /Foobar -- convert to CurrentPage/Foobar
1301                         /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1302                         */
1303                         $c = substr($m[1],0,1);
1304                         $noforce = ($c != ':');
1305                         if( $c == '/' ) { # subpage
1306                                 if(substr($m[1],-1,1)=='/') {                 # / at end means we don't want the slash to be shown
1307                                         $m[1]=substr($m[1],1,strlen($m[1])-2);
1308                                         $noslash=$m[1];
1309                                 } else {
1310                                         $noslash=substr($m[1],1);
1311                                 }
1312                                 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1313                                         $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1314                                         if( '' == $text ) {
1315                                                 $text= $m[1];
1316                                         } # this might be changed for ugliness reasons
1317                                 } else {
1318                                         $link = $noslash; # no subpage allowed, use standard link
1319                                 }
1320                         } elseif( $noforce ) { # no subpage
1321                                 $link = $m[1];
1322                         } else {
1323                                 $link = substr( $m[1], 1 );
1324                         }
1325                         $wasblank = ( '' == $text );
1326                         if( $wasblank )
1327                         $text = $link;
1328
1329                         $nt = Title::newFromText( $link );
1330                         if( !$nt ) {
1331                                 $s .= $prefix . '[[' . $line;
1332                                 continue;
1333                         }
1334                         $ns = $nt->getNamespace();
1335                         $iw = $nt->getInterWiki();
1336                         if( $noforce ) {
1337                                 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1338                                         array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1339                                         $tmp = $prefix . $trail ;
1340                                         $s .= (trim($tmp) == '')? '': $tmp;
1341                                         continue;
1342                                 }
1343                                 if ( $ns == NS_IMAGE ) {
1344                                         $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1345                                         $wgLinkCache->addImageLinkObj( $nt );
1346                                         continue;
1347                                 }
1348                                 if ( $ns == NS_CATEGORY ) {
1349                                         $t = $nt->getText() ;
1350                                         $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).":".$t ) ;
1351
1352                                         $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1353                                         $pPLC=$sk->postParseLinkColour();
1354                                         $sk->postParseLinkColour( false );
1355                                         $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1356                                         $sk->postParseLinkColour( $pPLC );
1357                                         $wgLinkCache->resume();
1358
1359                                         $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1360                                         $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1361                                         $this->mOutput->mCategoryLinks[] = $t ;
1362                                         $s .= $prefix . $trail ;
1363                                         continue;
1364                                 }
1365                         }
1366                         if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1367                         ( strpos( $link, '#' ) == FALSE ) ) {
1368                                 # Self-links are handled specially; generally de-link and change to bold.
1369                                 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1370                                 continue;
1371                         }
1372
1373                         if( $ns == NS_MEDIA ) {
1374                                 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1375                                 $wgLinkCache->addImageLinkObj( $nt );
1376                                 continue;
1377                         } elseif( $ns == NS_SPECIAL ) {
1378                                 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1379                                 continue;
1380                         }
1381                         $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1382                 }
1383                 wfProfileOut( $fname );
1384                 return $s;
1385         }
1386
1387         # Some functions here used by doBlockLevels()
1388         #
1389         /* private */ function closeParagraph() {
1390                 $result = '';
1391                 if ( '' != $this->mLastSection ) {
1392                         $result = '</' . $this->mLastSection  . ">\n";
1393                 }
1394                 $this->mInPre = false;
1395                 $this->mLastSection = '';
1396                 return $result;
1397         }
1398         # getCommon() returns the length of the longest common substring
1399         # of both arguments, starting at the beginning of both.
1400         #
1401         /* private */ function getCommon( $st1, $st2 ) {
1402                 $fl = strlen( $st1 );
1403                 $shorter = strlen( $st2 );
1404                 if ( $fl < $shorter ) { $shorter = $fl; }
1405
1406                 for ( $i = 0; $i < $shorter; ++$i ) {
1407                         if ( $st1{$i} != $st2{$i} ) { break; }
1408                 }
1409                 return $i;
1410         }
1411         # These next three functions open, continue, and close the list
1412         # element appropriate to the prefix character passed into them.
1413         #
1414         /* private */ function openList( $char )
1415     {
1416                 $result = $this->closeParagraph();
1417
1418                 if ( '*' == $char ) { $result .= '<ul><li>'; }
1419                 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1420                 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1421                 else if ( ';' == $char ) {
1422                         $result .= '<dl><dt>';
1423                         $this->mDTopen = true;
1424                 }
1425                 else { $result = '<!-- ERR 1 -->'; }
1426
1427                 return $result;
1428         }
1429
1430         /* private */ function nextItem( $char ) {
1431                 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1432                 else if ( ':' == $char || ';' == $char ) {
1433                         $close = "</dd>";
1434                         if ( $this->mDTopen ) { $close = '</dt>'; }
1435                         if ( ';' == $char ) {
1436                                 $this->mDTopen = true;
1437                                 return $close . '<dt>';
1438                         } else {
1439                                 $this->mDTopen = false;
1440                                 return $close . '<dd>';
1441                         }
1442                 }
1443                 return '<!-- ERR 2 -->';
1444         }
1445
1446         /* private */function closeList( $char ) {
1447                 if ( '*' == $char ) { $text = '</li></ul>'; }
1448                 else if ( '#' == $char ) { $text = '</li></ol>'; }
1449                 else if ( ':' == $char ) {
1450                         if ( $this->mDTopen ) {
1451                                 $this->mDTopen = false;
1452                                 $text = '</dt></dl>';
1453                         } else {
1454                                 $text = '</dd></dl>';
1455                         }
1456                 }
1457                 else {  return '<!-- ERR 3 -->'; }
1458                 return $text."\n";
1459         }
1460
1461         /* private */ function doBlockLevels( $text, $linestart ) {
1462                 $fname = 'Parser::doBlockLevels';
1463                 wfProfileIn( $fname );
1464
1465                 # Parsing through the text line by line.  The main thing
1466                 # happening here is handling of block-level elements p, pre,
1467                 # and making lists from lines starting with * # : etc.
1468                 #
1469                 $textLines = explode( "\n", $text );
1470
1471                 $lastPrefix = $output = $lastLine = '';
1472                 $this->mDTopen = $inBlockElem = false;
1473                 $prefixLength = 0;
1474                 $paragraphStack = false;
1475
1476                 if ( !$linestart ) {
1477                         $output .= array_shift( $textLines );
1478                 }
1479                 foreach ( $textLines as $oLine ) {
1480                         $lastPrefixLength = strlen( $lastPrefix );
1481                         $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1482                         $preOpenMatch = preg_match("/<pre/i", $oLine );
1483                         if ( !$this->mInPre ) {
1484                                 # Multiple prefixes may abut each other for nested lists.
1485                                 $prefixLength = strspn( $oLine, '*#:;' );
1486                                 $pref = substr( $oLine, 0, $prefixLength );
1487
1488                                 # eh?
1489                                 $pref2 = str_replace( ';', ':', $pref );
1490                                 $t = substr( $oLine, $prefixLength );
1491                                 $this->mInPre = !empty($preOpenMatch);
1492                         } else {
1493                                 # Don't interpret any other prefixes in preformatted text
1494                                 $prefixLength = 0;
1495                                 $pref = $pref2 = '';
1496                                 $t = $oLine;
1497                         }
1498
1499                         # List generation
1500                         if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1501                                 # Same as the last item, so no need to deal with nesting or opening stuff
1502                                 $output .= $this->nextItem( substr( $pref, -1 ) );
1503                                 $paragraphStack = false;
1504
1505                                 if ( ";" == substr( $pref, -1 ) ) {
1506                                         # The one nasty exception: definition lists work like this:
1507                                         # ; title : definition text
1508                                         # So we check for : in the remainder text to split up the
1509                                         # title and definition, without b0rking links.
1510                                         # FIXME: This is not foolproof. Something better in Tokenizer might help.
1511                                         if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1512                                                 $term = $match[1];
1513                                                 $output .= $term . $this->nextItem( ':' );
1514                                                 $t = $match[2];
1515                                         }
1516                                 }
1517                         } elseif( $prefixLength || $lastPrefixLength ) {
1518                                 # Either open or close a level...
1519                                 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1520                                 $paragraphStack = false;
1521
1522                                 while( $commonPrefixLength < $lastPrefixLength ) {
1523                                         $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1524                                         --$lastPrefixLength;
1525                                 }
1526                                 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1527                                         $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1528                                 }
1529                                 while ( $prefixLength > $commonPrefixLength ) {
1530                                         $char = substr( $pref, $commonPrefixLength, 1 );
1531                                         $output .= $this->openList( $char );
1532
1533                                         if ( ';' == $char ) {
1534                                                 # FIXME: This is dupe of code above
1535                                                 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1536                                                         $term = $match[1];
1537                                                         $output .= $term . $this->nextItem( ":" );
1538                                                         $t = $match[2];
1539                                                 }
1540                                         }
1541                                         ++$commonPrefixLength;
1542                                 }
1543                                 $lastPrefix = $pref2;
1544                         }
1545                         if( 0 == $prefixLength ) {
1546                                 # No prefix (not in list)--go to paragraph mode
1547                                 $uniq_prefix = UNIQ_PREFIX;
1548                                 // XXX: use a stack for nestable elements like span, table and div
1549                                 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1550                                 $closematch = preg_match(
1551                                         '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1552                                         '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1553                                 if ( $openmatch or $closematch ) {
1554                                         $paragraphStack = false;
1555                                         $output .= $this->closeParagraph();
1556                                         if($preOpenMatch and !$preCloseMatch) {
1557                                                 $this->mInPre = true;
1558                                         }
1559                                         if ( $closematch  ) {
1560                                                 $inBlockElem = false;
1561                                         } else {
1562                                                 $inBlockElem = true;
1563                                         }
1564                                 } else if ( !$inBlockElem && !$this->mInPre ) {
1565                                         if ( " " == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1566                                                 // pre
1567                                                 if ($this->mLastSection != 'pre') {
1568                                                         $paragraphStack = false;
1569                                                         $output .= $this->closeParagraph().'<pre>';
1570                                                         $this->mLastSection = 'pre';
1571                                                 }
1572                                         } else {
1573                                                 // paragraph
1574                                                 if ( '' == trim($t) ) {
1575                                                         if ( $paragraphStack ) {
1576                                                                 $output .= $paragraphStack.'<br />';
1577                                                                 $paragraphStack = false;
1578                                                                 $this->mLastSection = 'p';
1579                                                         } else {
1580                                                                 if ($this->mLastSection != 'p' ) {
1581                                                                         $output .= $this->closeParagraph();
1582                                                                         $this->mLastSection = '';
1583                                                                         $paragraphStack = '<p>';
1584                                                                 } else {
1585                                                                         $paragraphStack = '</p><p>';
1586                                                                 }
1587                                                         }
1588                                                 } else {
1589                                                         if ( $paragraphStack ) {
1590                                                                 $output .= $paragraphStack;
1591                                                                 $paragraphStack = false;
1592                                                                 $this->mLastSection = 'p';
1593                                                         } else if ($this->mLastSection != 'p') {
1594                                                                 $output .= $this->closeParagraph().'<p>';
1595                                                                 $this->mLastSection = 'p';
1596                                                         }
1597                                                 }
1598                                         }
1599                                 }
1600                         }
1601                         if ($paragraphStack === false) {
1602                                 $output .= $t."\n";
1603                         }
1604                 }
1605                 while ( $prefixLength ) {
1606                         $output .= $this->closeList( $pref2{$prefixLength-1} );
1607                         --$prefixLength;
1608                 }
1609                 if ( '' != $this->mLastSection ) {
1610                         $output .= '</' . $this->mLastSection . '>';
1611                         $this->mLastSection = '';
1612                 }
1613
1614                 wfProfileOut( $fname );
1615                 return $output;
1616         }
1617
1618         # Return value of a magic variable (like PAGENAME)
1619         function getVariableValue( $index ) {
1620                 global $wgLang, $wgSitename, $wgServer;
1621
1622                 switch ( $index ) {
1623                         case MAG_CURRENTMONTH:
1624                                 return $wgLang->formatNum( date( 'm' ) );
1625                         case MAG_CURRENTMONTHNAME:
1626                                 return $wgLang->getMonthName( date('n') );
1627                         case MAG_CURRENTMONTHNAMEGEN:
1628                                 return $wgLang->getMonthNameGen( date('n') );
1629                         case MAG_CURRENTDAY:
1630                                 return $wgLang->formatNum( date('j') );
1631                         case MAG_PAGENAME:
1632                                 return $this->mTitle->getText();
1633                         case MAG_PAGENAMEE:
1634                                 return $this->mTitle->getPartialURL();
1635                         case MAG_NAMESPACE:
1636                                 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1637                                 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch  by Dori
1638                         case MAG_CURRENTDAYNAME:
1639                                 return $wgLang->getWeekdayName( date('w')+1 );
1640                         case MAG_CURRENTYEAR:
1641                                 return $wgLang->formatNum( date( 'Y' ) );
1642                         case MAG_CURRENTTIME:
1643                                 return $wgLang->time( wfTimestampNow(), false );
1644                         case MAG_NUMBEROFARTICLES:
1645                                 return $wgLang->formatNum( wfNumberOfArticles() );
1646                         case MAG_SITENAME:
1647                                 return $wgSitename;
1648                         case MAG_SERVER:
1649                                 return $wgServer;
1650                         default:
1651                                 return NULL;
1652                 }
1653         }
1654
1655         # initialise the magic variables (like CURRENTMONTHNAME)
1656         function initialiseVariables() {
1657                 global $wgVariableIDs;
1658                 $this->mVariables = array();
1659                 foreach ( $wgVariableIDs as $id ) {
1660                         $mw =& MagicWord::get( $id );
1661                         $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1662                 }
1663         }
1664
1665         /* private */ function replaceVariables( $text, $args = array() ) {
1666                 global $wgLang, $wgScript, $wgArticlePath;
1667
1668                 # Prevent too big inclusions
1669                 if(strlen($text)> MAX_INCLUDE_SIZE)
1670                    return $text;
1671
1672                 $fname = 'Parser::replaceVariables';
1673                 wfProfileIn( $fname );
1674
1675                 $bail = false;
1676                 $titleChars = Title::legalChars();
1677                 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1678
1679                 # This function is called recursively. To keep track of arguments we need a stack:
1680                 array_push( $this->mArgStack, $args );
1681
1682                 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1683                 $GLOBALS['wgCurParser'] =& $this;
1684
1685
1686                 if ( $this->mOutputType == OT_HTML ) {
1687                         # Variable substitution
1688                         $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1689
1690                         # Argument substitution
1691                         $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1692                 }
1693                 # Template substitution
1694                 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1695                 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1696
1697                 array_pop( $this->mArgStack );
1698
1699                 wfProfileOut( $fname );
1700                 return $text;
1701         }
1702
1703         function variableSubstitution( $matches ) {
1704                 if ( !$this->mVariables ) {
1705                         $this->initialiseVariables();
1706                 }
1707                 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1708                         $text = $this->mVariables[$matches[1]];
1709                         $this->mOutput->mContainsOldMagic = true;
1710                 } else {
1711                         $text = $matches[0];
1712                 }
1713                 return $text;
1714         }
1715
1716         # Split template arguments
1717         function getTemplateArgs( $argsString ) {
1718                 if ( $argsString === '' ) {
1719                         return array();
1720                 }
1721
1722                 $args = explode( '|', substr( $argsString, 1 ) );
1723
1724                 # If any of the arguments contains a '[[' but no ']]', it needs to be
1725                 # merged with the next arg because the '|' character between belongs
1726                 # to the link syntax and not the template parameter syntax.
1727                 $argc = count($args);
1728                 $i = 0;
1729                 for ( $i = 0; $i < $argc-1; $i++ ) {
1730                         if ( substr_count ( $args[$i], "[[" ) != substr_count ( $args[$i], "]]" ) ) {
1731                                 $args[$i] .= "|".$args[$i+1];
1732                                 array_splice($args, $i+1, 1);
1733                                 $i--;
1734                                 $argc--;
1735                         }
1736                 }
1737
1738                 return $args;
1739         }
1740
1741         function braceSubstitution( $matches ) {
1742                 global $wgLinkCache, $wgLang;
1743                 $fname = 'Parser::braceSubstitution';
1744                 $found = false;
1745                 $nowiki = false;
1746                 $noparse = false;
1747
1748                 $title = NULL;
1749
1750                 # $newline is an optional newline character before the braces
1751                 # $part1 is the bit before the first |, and must contain only title characters
1752                 # $args is a list of arguments, starting from index 0, not including $part1
1753
1754                 $newline = $matches[1];
1755                 $part1 = $matches[2];
1756                 # If the third subpattern matched anything, it will start with |
1757
1758                 $args = $this->getTemplateArgs($matches[3]);
1759                 $argc = count( $args );
1760
1761                 # {{{}}}
1762                 if ( strpos( $matches[0], '{{{' ) !== false ) {
1763                         $text = $matches[0];
1764                         $found = true;
1765                         $noparse = true;
1766                 }
1767
1768                 # SUBST
1769                 if ( !$found ) {
1770                         $mwSubst =& MagicWord::get( MAG_SUBST );
1771                         if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1772                                 if ( $this->mOutputType != OT_WIKI ) {
1773                                         # Invalid SUBST not replaced at PST time
1774                                         # Return without further processing
1775                                         $text = $matches[0];
1776                                         $found = true;
1777                                         $noparse= true;
1778                                 }
1779                         } elseif ( $this->mOutputType == OT_WIKI ) {
1780                                 # SUBST not found in PST pass, do nothing
1781                                 $text = $matches[0];
1782                                 $found = true;
1783                         }
1784                 }
1785
1786                 # MSG, MSGNW and INT
1787                 if ( !$found ) {
1788                         # Check for MSGNW:
1789                         $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1790                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1791                                 $nowiki = true;
1792                         } else {
1793                                 # Remove obsolete MSG:
1794                                 $mwMsg =& MagicWord::get( MAG_MSG );
1795                                 $mwMsg->matchStartAndRemove( $part1 );
1796                         }
1797
1798                         # Check if it is an internal message
1799                         $mwInt =& MagicWord::get( MAG_INT );
1800                         if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1801                                 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1802                                         $text = wfMsgReal( $part1, $args, true );
1803                                         $found = true;
1804                                 }
1805                         }
1806                 }
1807
1808                 # NS
1809                 if ( !$found ) {
1810                         # Check for NS: (namespace expansion)
1811                         $mwNs = MagicWord::get( MAG_NS );
1812                         if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1813                                 if ( intval( $part1 ) ) {
1814                                         $text = $wgLang->getNsText( intval( $part1 ) );
1815                                         $found = true;
1816                                 } else {
1817                                         $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1818                                         if ( !is_null( $index ) ) {
1819                                                 $text = $wgLang->getNsText( $index );
1820                                                 $found = true;
1821                                         }
1822                                 }
1823                         }
1824                 }
1825
1826                 # LOCALURL and LOCALURLE
1827                 if ( !$found ) {
1828                         $mwLocal = MagicWord::get( MAG_LOCALURL );
1829                         $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1830
1831                         if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1832                                 $func = 'getLocalURL';
1833                         } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1834                                 $func = 'escapeLocalURL';
1835                         } else {
1836                                 $func = '';
1837                         }
1838
1839                         if ( $func !== '' ) {
1840                                 $title = Title::newFromText( $part1 );
1841                                 if ( !is_null( $title ) ) {
1842                                         if ( $argc > 0 ) {
1843                                                 $text = $title->$func( $args[0] );
1844                                         } else {
1845                                                 $text = $title->$func();
1846                                         }
1847                                         $found = true;
1848                                 }
1849                         }
1850                 }
1851
1852                 # Internal variables
1853                 if ( !$this->mVariables ) {
1854                         $this->initialiseVariables();
1855                 }
1856                 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1857                         $text = $this->mVariables[$part1];
1858                         $found = true;
1859                         $this->mOutput->mContainsOldMagic = true;
1860                 }
1861
1862                 # Template table test
1863
1864                 # Did we encounter this template already? If yes, it is in the cache
1865                 # and we need to check for loops.
1866                 if ( isset( $this->mTemplates[$part1] ) ) {
1867                         # Infinite loop test
1868                         if ( isset( $this->mTemplatePath[$part1] ) ) {
1869                                 $noparse = true;
1870                                 $found = true;
1871                         }
1872                         # set $text to cached message.
1873                         $text = $this->mTemplates[$part1];
1874                         $found = true;
1875                 }
1876
1877                 # Load from database
1878                 if ( !$found ) {
1879                         $title = Title::newFromText( $part1, NS_TEMPLATE );
1880                         if ( !is_null( $title ) && !$title->isExternal() ) {
1881                                 # Check for excessive inclusion
1882                                 $dbk = $title->getPrefixedDBkey();
1883                                 if ( $this->incrementIncludeCount( $dbk ) ) {
1884                                         # This should never be reached.
1885                                         $article = new Article( $title );
1886                                         $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1887                                         if ( $articleContent !== false ) {
1888                                                 $found = true;
1889                                                 $text = $articleContent;
1890
1891                                         }
1892                                 }
1893
1894                                 # If the title is valid but undisplayable, make a link to it
1895                                 if ( $this->mOutputType == OT_HTML && !$found ) {
1896                                         $text = '[[' . $title->getPrefixedText() . ']]';
1897                                         $found = true;
1898                                 }
1899
1900                                 # Template cache array insertion
1901                                 $this->mTemplates[$part1] = $text;
1902                         }
1903                 }
1904
1905                 # Recursive parsing, escaping and link table handling
1906                 # Only for HTML output
1907                 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1908                         $text = wfEscapeWikiText( $text );
1909                 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1910                         # Clean up argument array
1911                         $assocArgs = array();
1912                         $index = 1;
1913                         foreach( $args as $arg ) {
1914                                 $eqpos = strpos( $arg, '=' );
1915                                 if ( $eqpos === false ) {
1916                                         $assocArgs[$index++] = $arg;
1917                                 } else {
1918                                         $name = trim( substr( $arg, 0, $eqpos ) );
1919                                         $value = trim( substr( $arg, $eqpos+1 ) );
1920                                         if ( $value === false ) {
1921                                                 $value = '';
1922                                         }
1923                                         if ( $name !== false ) {
1924                                                 $assocArgs[$name] = $value;
1925                                         }
1926                                 }
1927                         }
1928
1929                         # Do not enter included links in link table
1930                         if ( !is_null( $title ) ) {
1931                                 $wgLinkCache->suspend();
1932                         }
1933
1934                         # Add a new element to the templace recursion path
1935                         $this->mTemplatePath[$part1] = 1;
1936
1937                         $text = $this->stripParse( $text, $newline, $assocArgs );
1938
1939                         # Resume the link cache and register the inclusion as a link
1940                         if ( !is_null( $title ) ) {
1941                                 $wgLinkCache->resume();
1942                                 $wgLinkCache->addLinkObj( $title );
1943                         }
1944                 }
1945                 # Empties the template path
1946                 $this->mTemplatePath = array();
1947
1948                 if ( !$found ) {
1949                         return $matches[0];
1950                 } else {
1951                         return $text;
1952                 }
1953         }
1954
1955         # Triple brace replacement -- used for template arguments
1956         function argSubstitution( $matches ) {
1957                 $newline = $matches[1];
1958                 $arg = trim( $matches[2] );
1959                 $text = $matches[0];
1960                 $inputArgs = end( $this->mArgStack );
1961
1962                 if ( array_key_exists( $arg, $inputArgs ) ) {
1963                         $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1964                 }
1965
1966                 return $text;
1967         }
1968
1969         # Returns true if the function is allowed to include this entity
1970         function incrementIncludeCount( $dbk ) {
1971                 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1972                         $this->mIncludeCount[$dbk] = 0;
1973                 }
1974                 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1975                         return true;
1976                 } else {
1977                         return false;
1978                 }
1979         }
1980
1981
1982         # Cleans up HTML, removes dangerous tags and attributes
1983         /* private */ function removeHTMLtags( $text ) {
1984                 global $wgUseTidy, $wgUserHtml;
1985                 $fname = 'Parser::removeHTMLtags';
1986                 wfProfileIn( $fname );
1987
1988                 if( $wgUserHtml ) {
1989                         $htmlpairs = array( # Tags that must be closed
1990                                 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1991                                 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1992                                 'strike', 'strong', 'tt', 'var', 'div', 'center',
1993                                 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1994                                 'ruby', 'rt' , 'rb' , 'rp', 'p'
1995                         );
1996                         $htmlsingle = array(
1997                                 'br', 'hr', 'li', 'dt', 'dd'
1998                         );
1999                         $htmlnest = array( # Tags that can be nested--??
2000                                 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2001                                 'dl', 'font', 'big', 'small', 'sub', 'sup'
2002                         );
2003                         $tabletags = array( # Can only appear inside table
2004                                 'td', 'th', 'tr'
2005                         );
2006                 } else {
2007                         $htmlpairs = array();
2008                         $htmlsingle = array();
2009                         $htmlnest = array();
2010                         $tabletags = array();
2011                 }
2012
2013                 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2014                 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2015
2016                 $htmlattrs = $this->getHTMLattrs () ;
2017
2018                 # Remove HTML comments
2019                 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
2020
2021                 $bits = explode( '<', $text );
2022                 $text = array_shift( $bits );
2023                 if(!$wgUseTidy) {
2024                         $tagstack = array(); $tablestack = array();
2025                         foreach ( $bits as $x ) {
2026                                 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2027                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2028                                 $x, $regs );
2029                                 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2030                                 error_reporting( $prev );
2031
2032                                 $badtag = 0 ;
2033                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2034                                         # Check our stack
2035                                         if ( $slash ) {
2036                                                 # Closing a tag...
2037                                                 if ( ! in_array( $t, $htmlsingle ) &&
2038                                                 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2039                                                         @array_push( $tagstack, $ot );
2040                                                         $badtag = 1;
2041                                                 } else {
2042                                                         if ( $t == 'table' ) {
2043                                                                 $tagstack = array_pop( $tablestack );
2044                                                         }
2045                                                         $newparams = '';
2046                                                 }
2047                                         } else {
2048                                                 # Keep track for later
2049                                                 if ( in_array( $t, $tabletags ) &&
2050                                                 ! in_array( 'table', $tagstack ) ) {
2051                                                         $badtag = 1;
2052                                                 } else if ( in_array( $t, $tagstack ) &&
2053                                                 ! in_array ( $t , $htmlnest ) ) {
2054                                                         $badtag = 1 ;
2055                                                 } else if ( ! in_array( $t, $htmlsingle ) ) {
2056                                                         if ( $t == 'table' ) {
2057                                                                 array_push( $tablestack, $tagstack );
2058                                                                 $tagstack = array();
2059                                                         }
2060                                                         array_push( $tagstack, $t );
2061                                                 }
2062                                                 # Strip non-approved attributes from the tag
2063                                                 $newparams = $this->fixTagAttributes($params);
2064
2065                                         }
2066                                         if ( ! $badtag ) {
2067                                                 $rest = str_replace( '>', '&gt;', $rest );
2068                                                 $text .= "<$slash$t $newparams$brace$rest";
2069                                                 continue;
2070                                         }
2071                                 }
2072                                 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2073                         }
2074                         # Close off any remaining tags
2075                         while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2076                                 $text .= "</$t>\n";
2077                                 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2078                         }
2079                 } else {
2080                         # this might be possible using tidy itself
2081                         foreach ( $bits as $x ) {
2082                                 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2083                                 $x, $regs );
2084                                 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2085                                 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2086                                         $newparams = $this->fixTagAttributes($params);
2087                                         $rest = str_replace( '>', '&gt;', $rest );
2088                                         $text .= "<$slash$t $newparams$brace$rest";
2089                                 } else {
2090                                         $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2091                                 }
2092                         }
2093                 }
2094                 wfProfileOut( $fname );
2095                 return $text;
2096         }
2097
2098
2099 /*
2100  *
2101  * This function accomplishes several tasks:
2102  * 1) Auto-number headings if that option is enabled
2103  * 2) Add an [edit] link to sections for logged in users who have enabled the option
2104  * 3) Add a Table of contents on the top for users who have enabled the option
2105  * 4) Auto-anchor headings
2106  *
2107  * It loops through all headlines, collects the necessary data, then splits up the
2108  * string and re-inserts the newly formatted headlines.
2109  *
2110  */
2111
2112         /* private */ function formatHeadings( $text, $isMain=true ) {
2113                 global $wgInputEncoding, $wgMaxTocLevel;
2114
2115                 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2116                 $doShowToc = $this->mOptions->getShowToc();
2117                 $forceTocHere = false;
2118                 if( !$this->mTitle->userCanEdit() ) {
2119                         $showEditLink = 0;
2120                         $rightClickHack = 0;
2121                 } else {
2122                         $showEditLink = $this->mOptions->getEditSection();
2123                         $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2124                 }
2125
2126                 # Inhibit editsection links if requested in the page
2127                 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2128                 if( $esw->matchAndRemove( $text ) ) {
2129                         $showEditLink = 0;
2130                 }
2131                 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2132                 # do not add TOC
2133                 $mw =& MagicWord::get( MAG_NOTOC );
2134                 if( $mw->matchAndRemove( $text ) ) {
2135                         $doShowToc = 0;
2136                 }
2137
2138                 # never add the TOC to the Main Page. This is an entry page that should not
2139                 # be more than 1-2 screens large anyway
2140                 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2141                         $doShowToc = 0;
2142                 }
2143
2144                 # Get all headlines for numbering them and adding funky stuff like [edit]
2145                 # links - this is for later, but we need the number of headlines right now
2146                 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2147
2148                 # if there are fewer than 4 headlines in the article, do not show TOC
2149                 if( $numMatches < 4 ) {
2150                         $doShowToc = 0;
2151                 }
2152
2153                 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2154                 # override above conditions and always show TOC at that place
2155                 $mw =& MagicWord::get( MAG_TOC );
2156                 if ($mw->match( $text ) ) {
2157                         $doShowToc = 1;
2158                         $forceTocHere = true;
2159                 } else {
2160                         # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2161                         # override above conditions and always show TOC above first header
2162                         $mw =& MagicWord::get( MAG_FORCETOC );
2163                         if ($mw->matchAndRemove( $text ) ) {
2164                                 $doShowToc = 1;
2165                         }
2166                 }
2167
2168
2169
2170                 # We need this to perform operations on the HTML
2171                 $sk =& $this->mOptions->getSkin();
2172
2173                 # headline counter
2174                 $headlineCount = 0;
2175
2176                 # Ugh .. the TOC should have neat indentation levels which can be
2177                 # passed to the skin functions. These are determined here
2178                 $toclevel = 0;
2179                 $toc = '';
2180                 $full = '';
2181                 $head = array();
2182                 $sublevelCount = array();
2183                 $level = 0;
2184                 $prevlevel = 0;
2185                 foreach( $matches[3] as $headline ) {
2186                         $numbering = '';
2187                         if( $level ) {
2188                                 $prevlevel = $level;
2189                         }
2190                         $level = $matches[1][$headlineCount];
2191                         if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2192                                 # reset when we enter a new level
2193                                 $sublevelCount[$level] = 0;
2194                                 $toc .= $sk->tocIndent( $level - $prevlevel );
2195                                 $toclevel += $level - $prevlevel;
2196                         }
2197                         if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2198                                 # reset when we step back a level
2199                                 $sublevelCount[$level+1]=0;
2200                                 $toc .= $sk->tocUnindent( $prevlevel - $level );
2201                                 $toclevel -= $prevlevel - $level;
2202                         }
2203                         # count number of headlines for each level
2204                         @$sublevelCount[$level]++;
2205                         if( $doNumberHeadings || $doShowToc ) {
2206                                 $dot = 0;
2207                                 for( $i = 1; $i <= $level; $i++ ) {
2208                                         if( !empty( $sublevelCount[$i] ) ) {
2209                                                 if( $dot ) {
2210                                                         $numbering .= '.';
2211                                                 }
2212                                                 $numbering .= $sublevelCount[$i];
2213                                                 $dot = 1;
2214                                         }
2215                                 }
2216                         }
2217
2218                         # The canonized header is a version of the header text safe to use for links
2219                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2220                         $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2221                         $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2222
2223                         # strip out HTML
2224                         $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2225                         $tocline = trim( $canonized_headline );
2226                         $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2227                         $replacearray = array(
2228                                 '%3A' => ':',
2229                                 '%' => '.'
2230                         );
2231                         $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2232                         $refer[$headlineCount] = $canonized_headline;
2233
2234                         # count how many in assoc. array so we can track dupes in anchors
2235                         @$refers[$canonized_headline]++;
2236                         $refcount[$headlineCount]=$refers[$canonized_headline];
2237
2238                         # Prepend the number to the heading text
2239
2240                         if( $doNumberHeadings || $doShowToc ) {
2241                                 $tocline = $numbering . ' ' . $tocline;
2242
2243                                 # Don't number the heading if it is the only one (looks silly)
2244                                 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2245                                         # the two are different if the line contains a link
2246                                         $headline=$numbering . ' ' . $headline;
2247                                 }
2248                         }
2249
2250                         # Create the anchor for linking from the TOC to the section
2251                         $anchor = $canonized_headline;
2252                         if($refcount[$headlineCount] > 1 ) {
2253                                 $anchor .= '_' . $refcount[$headlineCount];
2254                         }
2255                         if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2256                                 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2257                         }
2258                         if( $showEditLink ) {
2259                                 if ( empty( $head[$headlineCount] ) ) {
2260                                         $head[$headlineCount] = '';
2261                                 }
2262                                 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
2263                         }
2264
2265                         # Add the edit section span
2266                         if( $rightClickHack ) {
2267                                 $headline = $sk->editSectionScript($headlineCount+1,$headline);
2268                         }
2269
2270                         # give headline the correct <h#> tag
2271                         @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
2272
2273                         $headlineCount++;
2274                 }
2275
2276                 if( $doShowToc ) {
2277                         $toclines = $headlineCount;
2278                         $toc .= $sk->tocUnindent( $toclevel );
2279                         $toc = $sk->tocTable( $toc );
2280                 }
2281
2282                 # split up and insert constructed headlines
2283
2284                 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2285                 $i = 0;
2286
2287                 foreach( $blocks as $block ) {
2288                         if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2289                             # This is the [edit] link that appears for the top block of text when
2290                                 # section editing is enabled
2291
2292                                 # Disabled because it broke block formatting
2293                                 # For example, a bullet point in the top line
2294                                 # $full .= $sk->editSectionLink(0);
2295                         }
2296                         $full .= $block;
2297                         if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2298                         # Top anchor now in skin
2299                                 $full = $full.$toc;
2300                         }
2301
2302                         if( !empty( $head[$i] ) ) {
2303                                 $full .= $head[$i];
2304                         }
2305                         $i++;
2306                 }
2307                 if($forceTocHere) {
2308                         $mw =& MagicWord::get( MAG_TOC );
2309                         return $mw->replace( $toc, $full );
2310                 } else {
2311                         return $full;
2312                 }
2313         }
2314
2315         # Return an HTML link for the "ISBN 123456" text
2316         /* private */ function magicISBN( $text ) {
2317                 global $wgLang;
2318                 $fname = 'Parser::magicISBN';
2319                 wfProfileIn( $fname );
2320
2321                 $a = split( 'ISBN ', " $text" );
2322                 if ( count ( $a ) < 2 ) {
2323                         wfProfileOut( $fname );
2324                         return $text;
2325                 }
2326                 $text = substr( array_shift( $a ), 1);
2327                 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2328
2329                 foreach ( $a as $x ) {
2330                         $isbn = $blank = '' ;
2331                         while ( ' ' == $x{0} ) {
2332                                 $blank .= ' ';
2333                                 $x = substr( $x, 1 );
2334                         }
2335                         while ( strstr( $valid, $x{0} ) != false ) {
2336                                 $isbn .= $x{0};
2337                                 $x = substr( $x, 1 );
2338                         }
2339                         $num = str_replace( '-', '', $isbn );
2340                         $num = str_replace( ' ', '', $num );
2341
2342                         if ( '' == $num ) {
2343                                 $text .= "ISBN $blank$x";
2344                         } else {
2345                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2346                                 $text .= '<a href="' .
2347                                 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2348                                         "\" class=\"internal\">ISBN $isbn</a>";
2349                                 $text .= $x;
2350                         }
2351                 }
2352                 wfProfileOut( $fname );
2353                 return $text;
2354         }
2355
2356         # Return an HTML link for the "GEO ..." text
2357         /* private */ function magicGEO( $text ) {
2358                 global $wgLang, $wgUseGeoMode;
2359                 if ( !isset ( $wgUseGeoMode ) || !$wgUseGeoMode ) return $text ;
2360                 $fname = 'Parser::magicGEO';
2361                 wfProfileIn( $fname );
2362
2363                 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2364                 $directions = array ( "N" => "North" , "S" => "South" , "E" => "East" , "W" => "West" ) ;
2365                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2366                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2367                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2368                 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2369
2370                 $a = split( 'GEO ', " $text" );
2371                 if ( count ( $a ) < 2 ) {
2372                         wfProfileOut( $fname );
2373                         return $text;
2374                 }
2375                 $text = substr( array_shift( $a ), 1);
2376                 $valid = '0123456789.+-:';
2377
2378                 foreach ( $a as $x ) {
2379                         $geo = $blank = '' ;
2380                         while ( ' ' == $x{0} ) {
2381                                 $blank .= ' ';
2382                                 $x = substr( $x, 1 );
2383                         }
2384                         while ( strstr( $valid, $x{0} ) != false ) {
2385                                 $geo .= $x{0};
2386                                 $x = substr( $x, 1 );
2387                         }
2388                         $num = str_replace( '+', '', $geo );
2389                         $num = str_replace( ' ', '', $num );
2390
2391                         if ( '' == $num || count ( explode ( ":" , $num , 3 ) ) < 2 ) {
2392                                 $text .= "GEO $blank$x";
2393                         } else {
2394                                 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2395                                 $text .= '<a href="' .
2396                                 $titleObj->escapeLocalUrl( "coordinates={$num}" ) .
2397                                         "\" class=\"internal\">GEO $geo</a>";
2398                                 $text .= $x;
2399                         }
2400                 }
2401                 wfProfileOut( $fname );
2402                 return $text;
2403         }
2404
2405         # Return an HTML link for the "RFC 1234" text
2406         /* private */ function magicRFC( $text ) {
2407                 global $wgLang;
2408
2409                 $a = split( 'RFC ', ' '.$text );
2410                 if ( count ( $a ) < 2 ) return $text;
2411                 $text = substr( array_shift( $a ), 1);
2412                 $valid = '0123456789';
2413
2414                 foreach ( $a as $x ) {
2415                         $rfc = $blank = '' ;
2416                         while ( ' ' == $x{0} ) {
2417                                 $blank .= ' ';
2418                                 $x = substr( $x, 1 );
2419                         }
2420                         while ( strstr( $valid, $x{0} ) != false ) {
2421                                 $rfc .= $x{0};
2422                                 $x = substr( $x, 1 );
2423                         }
2424
2425                         if ( '' == $rfc ) {
2426                                 $text .= "RFC $blank$x";
2427                         } else {
2428                                 $url = wfmsg( 'rfcurl' );
2429                                 $url = str_replace( '$1', $rfc, $url);
2430                                 $sk =& $this->mOptions->getSkin();
2431                                 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2432                                 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2433                         }
2434                 }
2435                 return $text;
2436         }
2437
2438         function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2439                 $this->mOptions = $options;
2440                 $this->mTitle =& $title;
2441                 $this->mOutputType = OT_WIKI;
2442
2443                 if ( $clearState ) {
2444                         $this->clearState();
2445                 }
2446
2447                 $stripState = false;
2448                 $pairs = array(
2449                         "\r\n" => "\n",
2450                         );
2451                 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2452                 // now with regexes
2453                 /*
2454                 $pairs = array(
2455                         "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2456                         "/<br *?>/i" => "<br />",
2457                 );
2458                 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2459                 */
2460                 $text = $this->strip( $text, $stripState, false );
2461                 $text = $this->pstPass2( $text, $user );
2462                 $text = $this->unstrip( $text, $stripState );
2463                 $text = $this->unstripNoWiki( $text, $stripState );
2464                 return $text;
2465         }
2466
2467         /* private */ function pstPass2( $text, &$user ) {
2468                 global $wgLang, $wgLocaltimezone, $wgCurParser;
2469
2470                 # Variable replacement
2471                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2472                 $text = $this->replaceVariables( $text );
2473
2474                 # Signatures
2475                 #
2476                 $n = $user->getName();
2477                 $k = $user->getOption( 'nickname' );
2478                 if ( '' == $k ) { $k = $n; }
2479                 if(isset($wgLocaltimezone)) {
2480                         $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2481                 }
2482                 /* Note: this is an ugly timezone hack for the European wikis */
2483                 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2484                   ' (' . date( 'T' ) . ')';
2485                 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2486
2487                 $text = preg_replace( '/~~~~~/', $d, $text );
2488                 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2489                 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2490
2491                 # Context links: [[|name]] and [[name (context)|]]
2492                 #
2493                 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2494                 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2495                 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2496                 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2497
2498                 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/";             # [[page (context)|]]
2499                 $p2 = "/\[\[\\|({$tc}+)]]/";                                    # [[|page]]
2500                 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/";          # [[namespace:page|]]
2501                 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2502                                                                                                                 # [[ns:page (cont)|]]
2503                 $context = "";
2504                 $t = $this->mTitle->getText();
2505                 if ( preg_match( $conpat, $t, $m ) ) {
2506                         $context = $m[2];
2507                 }
2508                 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2509                 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2510                 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2511
2512                 if ( '' == $context ) {
2513                         $text = preg_replace( $p2, '[[\\1]]', $text );
2514                 } else {
2515                         $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2516                 }
2517
2518                 /*
2519                 $mw =& MagicWord::get( MAG_SUBST );
2520                 $wgCurParser = $this->fork();
2521                 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2522                 $this->merge( $wgCurParser );
2523                 */
2524
2525                 # Trim trailing whitespace
2526                 # MAG_END (__END__) tag allows for trailing
2527                 # whitespace to be deliberately included
2528                 $text = rtrim( $text );
2529                 $mw =& MagicWord::get( MAG_END );
2530                 $mw->matchAndRemove( $text );
2531
2532                 return $text;
2533         }
2534
2535         # Set up some variables which are usually set up in parse()
2536         # so that an external function can call some class members with confidence
2537         function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2538                 $this->mTitle =& $title;
2539                 $this->mOptions = $options;
2540                 $this->mOutputType = $outputType;
2541                 if ( $clearState ) {
2542                         $this->clearState();
2543                 }
2544         }
2545
2546         function transformMsg( $text, $options ) {
2547                 global $wgTitle;
2548                 static $executing = false;
2549
2550                 # Guard against infinite recursion
2551                 if ( $executing ) {
2552                         return $text;
2553                 }
2554                 $executing = true;
2555
2556                 $this->mTitle = $wgTitle;
2557                 $this->mOptions = $options;
2558                 $this->mOutputType = OT_MSG;
2559                 $this->clearState();
2560                 $text = $this->replaceVariables( $text );
2561
2562                 $executing = false;
2563                 return $text;
2564         }
2565
2566         # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2567         # Callback will be called with the text within
2568         # Transform and return the text within
2569         function setHook( $tag, $callback ) {
2570                 $oldVal = @$this->mTagHooks[$tag];
2571                 $this->mTagHooks[$tag] = $callback;
2572                 return $oldVal;
2573         }
2574 }
2575
2576 class ParserOutput
2577 {
2578         var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2579         var $mCacheTime; # Used in ParserCache
2580
2581         function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2582                 $containsOldMagic = false )
2583         {
2584                 $this->mText = $text;
2585                 $this->mLanguageLinks = $languageLinks;
2586                 $this->mCategoryLinks = $categoryLinks;
2587                 $this->mContainsOldMagic = $containsOldMagic;
2588                 $this->mCacheTime = "";
2589         }
2590
2591         function getText() { return $this->mText; }
2592         function getLanguageLinks() { return $this->mLanguageLinks; }
2593         function getCategoryLinks() { return $this->mCategoryLinks; }
2594         function getCacheTime() { return $this->mCacheTime; }
2595         function containsOldMagic() { return $this->mContainsOldMagic; }
2596         function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2597         function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2598         function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2599         function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2600         function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2601
2602         function merge( $other ) {
2603                 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2604                 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2605                 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2606         }
2607
2608 }
2609
2610 class ParserOptions
2611 {
2612         # All variables are private
2613         var $mUseTeX;                    # Use texvc to expand <math> tags
2614         var $mUseCategoryMagic;          # Treat [[Category:xxxx]] tags specially
2615         var $mUseDynamicDates;           # Use $wgDateFormatter to format dates
2616         var $mInterwikiMagic;            # Interlanguage links are removed and returned in an array
2617         var $mAllowExternalImages;       # Allow external images inline
2618         var $mSkin;                      # Reference to the preferred skin
2619         var $mDateFormat;                # Date format index
2620         var $mEditSection;               # Create "edit section" links
2621         var $mEditSectionOnRightClick;   # Generate JavaScript to edit section on right click
2622         var $mNumberHeadings;            # Automatically number headings
2623         var $mShowToc;                   # Show table of contents
2624
2625         function getUseTeX()                        { return $this->mUseTeX; }
2626         function getUseCategoryMagic()              { return $this->mUseCategoryMagic; }
2627         function getUseDynamicDates()               { return $this->mUseDynamicDates; }
2628         function getInterwikiMagic()                { return $this->mInterwikiMagic; }
2629         function getAllowExternalImages()           { return $this->mAllowExternalImages; }
2630         function getSkin()                          { return $this->mSkin; }
2631         function getDateFormat()                    { return $this->mDateFormat; }
2632         function getEditSection()                   { return $this->mEditSection; }
2633         function getEditSectionOnRightClick()       { return $this->mEditSectionOnRightClick; }
2634         function getNumberHeadings()                { return $this->mNumberHeadings; }
2635         function getShowToc()                       { return $this->mShowToc; }
2636
2637         function setUseTeX( $x )                    { return wfSetVar( $this->mUseTeX, $x ); }
2638         function setUseCategoryMagic( $x )          { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2639         function setUseDynamicDates( $x )           { return wfSetVar( $this->mUseDynamicDates, $x ); }
2640         function setInterwikiMagic( $x )            { return wfSetVar( $this->mInterwikiMagic, $x ); }
2641         function setAllowExternalImages( $x )       { return wfSetVar( $this->mAllowExternalImages, $x ); }
2642         function setDateFormat( $x )                { return wfSetVar( $this->mDateFormat, $x ); }
2643         function setEditSection( $x )               { return wfSetVar( $this->mEditSection, $x ); }
2644         function setEditSectionOnRightClick( $x )   { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2645         function setNumberHeadings( $x )            { return wfSetVar( $this->mNumberHeadings, $x ); }
2646         function setShowToc( $x )                   { return wfSetVar( $this->mShowToc, $x ); }
2647
2648     function setSkin( &$x ) { $this->mSkin =& $x; }
2649
2650         /* static */ function newFromUser( &$user ) {
2651                 $popts = new ParserOptions;
2652                 $popts->initialiseFromUser( $user );
2653                 return $popts;
2654         }
2655
2656         function initialiseFromUser( &$userInput ) {
2657                 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2658
2659                 if ( !$userInput ) {
2660                         $user = new User;
2661                         $user->setLoaded( true );
2662                 } else {
2663                         $user =& $userInput;
2664                 }
2665
2666                 $this->mUseTeX = $wgUseTeX;
2667                 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2668                 $this->mUseDynamicDates = $wgUseDynamicDates;
2669                 $this->mInterwikiMagic = $wgInterwikiMagic;
2670                 $this->mAllowExternalImages = $wgAllowExternalImages;
2671                 $this->mSkin =& $user->getSkin();
2672                 $this->mDateFormat = $user->getOption( 'date' );
2673                 $this->mEditSection = $user->getOption( 'editsection' );
2674                 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2675                 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2676                 $this->mShowToc = $user->getOption( 'showtoc' );
2677         }
2678
2679
2680 }
2681
2682 # Regex callbacks, used in Parser::replaceVariables
2683 function wfBraceSubstitution( $matches )
2684 {
2685         global $wgCurParser;
2686         return $wgCurParser->braceSubstitution( $matches );
2687 }
2688
2689 function wfArgSubstitution( $matches )
2690 {
2691         global $wgCurParser;
2692         return $wgCurParser->argSubstitution( $matches );
2693 }
2694
2695 function wfVariableSubstitution( $matches )
2696 {
2697         global $wgCurParser;
2698         return $wgCurParser->variableSubstitution( $matches );
2699 }
2700
2701 function wfNumberOfArticles()
2702 {
2703         global $wgNumberOfArticles;
2704
2705         wfLoadSiteStats();
2706         return $wgNumberOfArticles;
2707 }
2708
2709 /* private */ function wfLoadSiteStats()
2710 {
2711         global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2712         $fname = 'wfLoadSiteStats';
2713
2714         if ( -1 != $wgNumberOfArticles ) return;
2715         $dbr =& wfGetDB( DB_SLAVE );
2716         $s = $dbr->getArray( 'site_stats',
2717                 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2718                 array( 'ss_row_id' => 1 ), $fname
2719         );
2720
2721         if ( $s === false ) {
2722                 return;
2723         } else {
2724                 $wgTotalViews = $s->ss_total_views;
2725                 $wgTotalEdits = $s->ss_total_edits;
2726                 $wgNumberOfArticles = $s->ss_good_articles;
2727         }
2728 }
2729
2730 function wfEscapeHTMLTagsOnly( $in ) {
2731         return str_replace(
2732                 array( '"', '>', '<' ),
2733                 array( '&quot;', '&gt;', '&lt;' ),
2734                 $in );
2735 }
2736
2737
2738 ?>