Parser cache moved to memcached
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 if( $GLOBALS['wgUseWikiHiero'] ){
6 require_once('extensions/wikihiero/wikihiero.php');
7 }
8 if( $GLOBALS['wgUseTimeline'] ){
9 require_once('extensions/timeline/Timeline.php');
10 }
11
12 # PHP Parser
13 #
14 # Processes wiki markup
15 #
16 # There are two main entry points into the Parser class: parse() and preSaveTransform().
17 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
18 #
19 # Globals used:
20 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
21 #
22 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
23 #
24 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
25 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
26 # $wgLocaltimezone
27 #
28 # * only within ParserOptions
29 #
30 #
31 #----------------------------------------
32 # Variable substitution O(N^2) attack
33 #-----------------------------------------
34 # Without countermeasures, it would be possible to attack the parser by saving a page
35 # filled with a large number of inclusions of large pages. The size of the generated
36 # page would be proportional to the square of the input size. Hence, we limit the number
37 # of inclusions of any given page, thus bringing any attack back to O(N).
38 #
39
40 define( "MAX_INCLUDE_REPEAT", 5 );
41
42 # Allowed values for $mOutputType
43 define( "OT_HTML", 1 );
44 define( "OT_WIKI", 2 );
45 define( "OT_MSG", 3 );
46
47 # string parameter for extractTags which will cause it
48 # to strip HTML comments in addition to regular
49 # <XML>-style tags. This should not be anything we
50 # may want to use in wikisyntax
51 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
52
53 # prefix for escaping, used in two functions at least
54 define( "UNIQ_PREFIX", "NaodW29");
55
56 class Parser
57 {
58 # Cleared with clearState():
59 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
60 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
61
62 # Temporary:
63 var $mOptions, $mTitle, $mOutputType;
64
65 function Parser()
66 {
67 $this->clearState();
68 }
69
70 function clearState()
71 {
72 $this->mOutput = new ParserOutput;
73 $this->mAutonumber = 0;
74 $this->mLastSection = "";
75 $this->mDTopen = false;
76 $this->mVariables = false;
77 $this->mIncludeCount = array();
78 $this->mStripState = array();
79 $this->mArgStack = array();
80 }
81
82 # First pass--just handle <nowiki> sections, pass the rest off
83 # to internalParse() which does all the real work.
84 #
85 # Returns a ParserOutput
86 #
87 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
88 {
89 global $wgUseTidy;
90 $fname = "Parser::parse";
91 wfProfileIn( $fname );
92
93 if ( $clearState ) {
94 $this->clearState();
95 }
96
97 $this->mOptions = $options;
98 $this->mTitle =& $title;
99 $this->mOutputType = OT_HTML;
100
101 $stripState = NULL;
102 $text = $this->strip( $text, $this->mStripState );
103 $text = $this->internalParse( $text, $linestart );
104 $text = $this->unstrip( $text, $this->mStripState );
105 # Clean up special characters, only run once, next-to-last before doBlockLevels
106 if(!$wgUseTidy) {
107 $fixtags = array(
108 # french spaces, last one Guillemet-left
109 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
110 # french spaces, Guillemet-right
111 "/(\\302\\253) /i"=>"\\1&nbsp;",
112 "/<hr *>/i" => '<hr/>',
113 "/<br *>/i" => '<br/>',
114 "/<center *>/i"=>'<div class="center">',
115 "/<\\/center *>/i" => '</div>',
116 # Clean up spare ampersands; note that we probably ought to be
117 # more careful about named entities.
118 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
119 );
120 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
121 } else {
122 $fixtags = array(
123 # french spaces, last one Guillemet-left
124 "/ (\\?|:|!|\\302\\273)/i"=>"&nbsp;\\1",
125 # french spaces, Guillemet-right
126 "/(\\302\\253) /i"=>"\\1&nbsp;",
127 "/<center *>/i"=>'<div class="center">',
128 "/<\\/center *>/i" => '</div>'
129 );
130 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
131 }
132 # only once and last
133 $text = $this->doBlockLevels( $text, $linestart );
134 if($wgUseTidy) {
135 $text = $this->tidy($text);
136 }
137 $this->mOutput->setText( $text );
138 wfProfileOut( $fname );
139 return $this->mOutput;
140 }
141
142 /* static */ function getRandomString()
143 {
144 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
145 }
146
147 # Replaces all occurrences of <$tag>content</$tag> in the text
148 # with a random marker and returns the new text. the output parameter
149 # $content will be an associative array filled with data on the form
150 # $unique_marker => content.
151
152 # If $content is already set, the additional entries will be appended
153
154 # If $tag is set to STRIP_COMMENTS, the function will extract
155 # <!-- HTML comments -->
156
157 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
158 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
159 if ( !$content ) {
160 $content = array( );
161 }
162 $n = 1;
163 $stripped = "";
164
165 while ( "" != $text ) {
166 if($tag==STRIP_COMMENTS) {
167 $p = preg_split( "/<!--/i", $text, 2 );
168 } else {
169 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
170 }
171 $stripped .= $p[0];
172 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
173 $text = "";
174 } else {
175 if($tag==STRIP_COMMENTS) {
176 $q = preg_split( "/-->/i", $p[1], 2 );
177 } else {
178 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
179 }
180 $marker = $rnd . sprintf("%08X", $n++);
181 $content[$marker] = $q[0];
182 $stripped .= $marker;
183 $text = $q[1];
184 }
185 }
186 return $stripped;
187 }
188
189 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
190 # If $render is set, performs necessary rendering operations on plugins
191 # Returns the text, and fills an array with data needed in unstrip()
192 # If the $state is already a valid strip state, it adds to the state
193
194 # When $stripcomments is set, HTML comments <!-- like this -->
195 # will be stripped in addition to other tags. This is important
196 # for section editing, where these comments cause confusion when
197 # counting the sections in the wikisource
198 function strip( $text, &$state, $stripcomments = false )
199 {
200 $render = ($this->mOutputType == OT_HTML);
201 $nowiki_content = array();
202 $hiero_content = array();
203 $timeline_content = array();
204 $math_content = array();
205 $pre_content = array();
206 $comment_content = array();
207
208 # Replace any instances of the placeholders
209 $uniq_prefix = UNIQ_PREFIX;
210 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
211
212 $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
213 foreach( $nowiki_content as $marker => $content ){
214 if( $render ){
215 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
216 } else {
217 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
218 }
219 }
220
221 $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
222 foreach( $hiero_content as $marker => $content ){
223 if( $render && $GLOBALS['wgUseWikiHiero']){
224 $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
225 } else {
226 $hiero_content[$marker] = "<hiero>$content</hiero>";
227 }
228 }
229
230 $text = Parser::extractTags("timeline", $text, $timeline_content, $uniq_prefix);
231 foreach( $timeline_content as $marker => $content ){
232 if( $render && $GLOBALS['wgUseTimeline']){
233 $timeline_content[$marker] = renderTimeline( $content );
234 } else {
235 $timeline_content[$marker] = "<timeline>$content</timeline>";
236 }
237 }
238
239 $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
240 foreach( $math_content as $marker => $content ){
241 if( $render ) {
242 if( $this->mOptions->getUseTeX() ) {
243 $math_content[$marker] = renderMath( $content );
244 } else {
245 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
246 }
247 } else {
248 $math_content[$marker] = "<math>$content</math>";
249 }
250 }
251
252 $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix);
253 foreach( $pre_content as $marker => $content ){
254 if( $render ){
255 $pre_content[$marker] = "<pre>" . wfEscapeHTMLTagsOnly( $content ) . "</pre>";
256 } else {
257 $pre_content[$marker] = "<pre>$content</pre>";
258 }
259 }
260 if($stripcomments) {
261 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
262 foreach( $comment_content as $marker => $content ){
263 $comment_content[$marker] = "<!--$content-->";
264 }
265 }
266
267 # Merge state with the pre-existing state, if there is one
268 if ( $state ) {
269 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
270 $state['hiero'] = $state['hiero'] + $hiero_content;
271 $state['timeline'] = $state['timeline'] + $timeline_content;
272 $state['math'] = $state['math'] + $math_content;
273 $state['pre'] = $state['pre'] + $pre_content;
274 $state['comment'] = $state['comment'] + $comment_content;
275 } else {
276 $state = array(
277 'nowiki' => $nowiki_content,
278 'hiero' => $hiero_content,
279 'timeline' => $timeline_content,
280 'math' => $math_content,
281 'pre' => $pre_content,
282 'comment' => $comment_content
283 );
284 }
285 return $text;
286 }
287
288 function unstrip( $text, &$state )
289 {
290 # Must expand in reverse order, otherwise nested tags will be corrupted
291 $contentDict = end( $state );
292 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
293 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
294 $text = str_replace( key( $contentDict ), $content, $text );
295 }
296 }
297
298 return $text;
299 }
300
301 # Add an item to the strip state
302 # Returns the unique tag which must be inserted into the stripped text
303 # The tag will be replaced with the original text in unstrip()
304
305 function insertStripItem( $text, &$state )
306 {
307 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
308 if ( !$state ) {
309 $state = array(
310 'nowiki' => array(),
311 'hiero' => array(),
312 'math' => array(),
313 'pre' => array()
314 );
315 }
316 $state['item'][$rnd] = $text;
317 return $rnd;
318 }
319
320 # This method generates the list of subcategories and pages for a category
321 function categoryMagic ()
322 {
323 global $wgLang , $wgUser ;
324 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
325
326 $cns = Namespace::getCategory() ;
327 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
328
329 $r = "<br style=\"clear:both;\"/>\n";
330
331
332 $sk =& $wgUser->getSkin() ;
333
334 $articles = array() ;
335 $children = array() ;
336 $data = array () ;
337 $id = $this->mTitle->getArticleID() ;
338
339 # FIXME: add limits
340 $t = wfStrencode( $this->mTitle->getDBKey() );
341 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
342 $res = wfQuery ( $sql, DB_READ ) ;
343 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
344
345 # For all pages that link to this category
346 foreach ( $data AS $x )
347 {
348 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
349 if ( $t != "" ) $t .= ":" ;
350 $t .= $x->cur_title ;
351
352 if ( $x->cur_namespace == $cns ) {
353 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
354 } else {
355 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
356 }
357 }
358 wfFreeResult ( $res ) ;
359
360 # Showing subcategories
361 if ( count ( $children ) > 0 ) {
362 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
363 $r .= implode ( ", " , $children ) ;
364 }
365
366 # Showing pages in this category
367 if ( count ( $articles ) > 0 ) {
368 $ti = $this->mTitle->getText() ;
369 $h = wfMsg( "category_header", $ti );
370 $r .= "<h2>{$h}</h2>\n" ;
371 $r .= implode ( ", " , $articles ) ;
372 }
373
374
375 return $r ;
376 }
377
378 function getHTMLattrs ()
379 {
380 $htmlattrs = array( # Allowed attributes--no scripting, etc.
381 "title", "align", "lang", "dir", "width", "height",
382 "bgcolor", "clear", /* BR */ "noshade", /* HR */
383 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
384 /* FONT */ "type", "start", "value", "compact",
385 /* For various lists, mostly deprecated but safe */
386 "summary", "width", "border", "frame", "rules",
387 "cellspacing", "cellpadding", "valign", "char",
388 "charoff", "colgroup", "col", "span", "abbr", "axis",
389 "headers", "scope", "rowspan", "colspan", /* Tables */
390 "id", "class", "name", "style" /* For CSS */
391 );
392 return $htmlattrs ;
393 }
394
395 function fixTagAttributes ( $t )
396 {
397 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
398 $htmlattrs = $this->getHTMLattrs() ;
399
400 # Strip non-approved attributes from the tag
401 $t = preg_replace(
402 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
403 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
404 $t);
405 # Strip javascript "expression" from stylesheets. Brute force approach:
406 # If anythin offensive is found, all attributes of the HTML tag are dropped
407
408 if( preg_match(
409 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
410 wfMungeToUtf8( $t ) ) )
411 {
412 $t="";
413 }
414
415 return trim ( $t ) ;
416 }
417
418 /* interface with html tidy, used if $wgUseTidy = true */
419 function tidy ( $text ) {
420 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
421 global $wgInputEncoding, $wgOutputEncoding;
422 $cleansource = '';
423 switch(strtoupper($wgOutputEncoding)) {
424 case 'ISO-8859-1':
425 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
426 break;
427 case 'UTF-8':
428 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
429 break;
430 default:
431 $wgTidyOpts .= ' -raw';
432 }
433
434 $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
435 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
436 '<head><title>test</title></head><body>'.$text.'</body></html>';
437 $descriptorspec = array(
438 0 => array("pipe", "r"),
439 1 => array("pipe", "w"),
440 2 => array("file", "/dev/null", "a")
441 );
442 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
443 if (is_resource($process)) {
444 fwrite($pipes[0], $text);
445 fclose($pipes[0]);
446 while (!feof($pipes[1])) {
447 $cleansource .= fgets($pipes[1], 1024);
448 }
449 fclose($pipes[1]);
450 $return_value = proc_close($process);
451 }
452 if( $cleansource == '' && $text != '') {
453 return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
454 } else {
455 return $cleansource;
456 }
457 }
458
459 function doTableStuff ( $t )
460 {
461 $t = explode ( "\n" , $t ) ;
462 $td = array () ; # Is currently a td tag open?
463 $ltd = array () ; # Was it TD or TH?
464 $tr = array () ; # Is currently a tr tag open?
465 $ltr = array () ; # tr attributes
466 foreach ( $t AS $k => $x )
467 {
468 $x = trim ( $x ) ;
469 $fc = substr ( $x , 0 , 1 ) ;
470 if ( "{|" == substr ( $x , 0 , 2 ) )
471 {
472 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
473 array_push ( $td , false ) ;
474 array_push ( $ltd , "" ) ;
475 array_push ( $tr , false ) ;
476 array_push ( $ltr , "" ) ;
477 }
478 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
479 else if ( "|}" == substr ( $x , 0 , 2 ) )
480 {
481 $z = "</table>\n" ;
482 $l = array_pop ( $ltd ) ;
483 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
484 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
485 array_pop ( $ltr ) ;
486 $t[$k] = $z ;
487 }
488 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
489 {
490 $z = trim ( substr ( $x , 2 ) ) ;
491 $t[$k] = "<caption>{$z}</caption>\n" ;
492 }*/
493 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
494 {
495 $x = substr ( $x , 1 ) ;
496 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
497 $z = "" ;
498 $l = array_pop ( $ltd ) ;
499 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
500 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
501 array_pop ( $ltr ) ;
502 $t[$k] = $z ;
503 array_push ( $tr , false ) ;
504 array_push ( $td , false ) ;
505 array_push ( $ltd , "" ) ;
506 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
507 }
508 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
509 {
510 if ( "|+" == substr ( $x , 0 , 2 ) )
511 {
512 $fc = "+" ;
513 $x = substr ( $x , 1 ) ;
514 }
515 $after = substr ( $x , 1 ) ;
516 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
517 $after = explode ( "||" , $after ) ;
518 $t[$k] = "" ;
519 foreach ( $after AS $theline )
520 {
521 $z = "" ;
522 if ( $fc != "+" )
523 {
524 $tra = array_pop ( $ltr ) ;
525 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
526 array_push ( $tr , true ) ;
527 array_push ( $ltr , "" ) ;
528 }
529
530 $l = array_pop ( $ltd ) ;
531 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
532 if ( $fc == "|" ) $l = "td" ;
533 else if ( $fc == "!" ) $l = "th" ;
534 else if ( $fc == "+" ) $l = "caption" ;
535 else $l = "" ;
536 array_push ( $ltd , $l ) ;
537 $y = explode ( "|" , $theline , 2 ) ;
538 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
539 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
540 $t[$k] .= $y ;
541 array_push ( $td , true ) ;
542 }
543 }
544 }
545
546 # Closing open td, tr && table
547 while ( count ( $td ) > 0 )
548 {
549 if ( array_pop ( $td ) ) $t[] = "</td>" ;
550 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
551 $t[] = "</table>" ;
552 }
553
554 $t = implode ( "\n" , $t ) ;
555 # $t = $this->removeHTMLtags( $t );
556 return $t ;
557 }
558
559 # Parses the text and adds the result to the strip state
560 # Returns the strip tag
561 function stripParse( $text, $linestart, $args )
562 {
563 $text = $this->strip( $text, $this->mStripState );
564 $text = $this->internalParse( $text, $linestart, $args, false );
565 if( $linestart ) {
566 $text = "\n" . $text;
567 }
568 return $this->insertStripItem( $text, $this->mStripState );
569 }
570
571 function internalParse( $text, $linestart, $args = array(), $isMain=true )
572 {
573 $fname = "Parser::internalParse";
574 wfProfileIn( $fname );
575
576 $text = $this->removeHTMLtags( $text );
577 $text = $this->replaceVariables( $text, $args );
578
579 $text = preg_replace( "/(^|\n)-----*/", "\\1<hr/>", $text );
580
581 $text = $this->doHeadings( $text );
582 if($this->mOptions->getUseDynamicDates()) {
583 global $wgDateFormatter;
584 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
585 }
586 $text = $this->doAllQuotes( $text );
587 $text = $this->replaceExternalLinks( $text );
588 $text = $this->replaceInternalLinks ( $text );
589 $text = $this->replaceInternalLinks ( $text );
590 //$text = $this->doTokenizedParser ( $text );
591 $text = $this->doTableStuff ( $text ) ;
592 $text = $this->magicISBN( $text );
593 $text = $this->magicRFC( $text );
594 $text = $this->formatHeadings( $text, $isMain );
595 $sk =& $this->mOptions->getSkin();
596 $text = $sk->transformContent( $text );
597
598 if ( !isset ( $this->categoryMagicDone ) ) {
599 $text .= $this->categoryMagic () ;
600 $this->categoryMagicDone = true ;
601 }
602
603 wfProfileOut( $fname );
604 return $text;
605 }
606
607
608 /* private */ function doHeadings( $text )
609 {
610 for ( $i = 6; $i >= 1; --$i ) {
611 $h = substr( "======", 0, $i );
612 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
613 "<h{$i}>\\1</h{$i}>\\2", $text );
614 }
615 return $text;
616 }
617
618 /* private */ function doAllQuotes( $text )
619 {
620 $outtext = "";
621 $lines = explode( "\r\n", $text );
622 foreach ( $lines as $line ) {
623 $outtext .= $this->doQuotes ( "", $line, "" ) . "\r\n";
624 }
625 return $outtext;
626 }
627
628 /* private */ function doQuotes( $pre, $text, $mode )
629 {
630 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
631 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
632 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
633 if ( substr ($m[2], 0, 1) == "'" ) {
634 $m[2] = substr ($m[2], 1);
635 if ($mode == "em") {
636 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "emstrong" );
637 } else if ($mode == "strong") {
638 return $m1_strong . $this->doQuotes ( "", $m[2], "" );
639 } else if (($mode == "emstrong") || ($mode == "both")) {
640 return $this->doQuotes ( "", $pre.$m1_strong.$m[2], "em" );
641 } else if ($mode == "strongem") {
642 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( "", $m[2], "em" );
643 } else {
644 return $m[1] . $this->doQuotes ( "", $m[2], "strong" );
645 }
646 } else {
647 if ($mode == "strong") {
648 return $this->doQuotes ( $m[1], $m[2], ($m[1] == "") ? "both" : "strongem" );
649 } else if ($mode == "em") {
650 return $m1_em . $this->doQuotes ( "", $m[2], "" );
651 } else if ($mode == "emstrong") {
652 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( "", $m[2], "strong" );
653 } else if (($mode == "strongem") || ($mode == "both")) {
654 return $this->doQuotes ( "", $pre.$m1_em.$m[2], "strong" );
655 } else {
656 return $m[1] . $this->doQuotes ( "", $m[2], "em" );
657 }
658 }
659 } else {
660 $text_strong = ($text == "") ? "" : "<strong>{$text}</strong>";
661 $text_em = ($text == "") ? "" : "<em>{$text}</em>";
662 if ($mode == "") {
663 return $pre . $text;
664 } else if ($mode == "em") {
665 return $pre . $text_em;
666 } else if ($mode == "strong") {
667 return $pre . $text_strong;
668 } else if ($mode == "strongem") {
669 return (($pre == "") && ($text == "")) ? "" : "<strong>{$pre}{$text_em}</strong>";
670 } else {
671 return (($pre == "") && ($text == "")) ? "" : "<em>{$pre}{$text_strong}</em>";
672 }
673 }
674 }
675
676 # Note: we have to do external links before the internal ones,
677 # and otherwise take great care in the order of things here, so
678 # that we don't end up interpreting some URLs twice.
679
680 /* private */ function replaceExternalLinks( $text )
681 {
682 $fname = "Parser::replaceExternalLinks";
683 wfProfileIn( $fname );
684 $text = $this->subReplaceExternalLinks( $text, "http", true );
685 $text = $this->subReplaceExternalLinks( $text, "https", true );
686 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
687 $text = $this->subReplaceExternalLinks( $text, "irc", false );
688 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
689 $text = $this->subReplaceExternalLinks( $text, "news", false );
690 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
691 wfProfileOut( $fname );
692 return $text;
693 }
694
695 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
696 {
697 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
698 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
699
700 # this is the list of separators that should be ignored if they
701 # are the last character of an URL but that should be included
702 # if they occur within the URL, e.g. "go to www.foo.com, where .."
703 # in this case, the last comma should not become part of the URL,
704 # but in "www.foo.com/123,2342,32.htm" it should.
705 $sep = ",;\.:";
706 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
707 $images = "gif|png|jpg|jpeg";
708
709 # PLEASE NOTE: The curly braces { } are not part of the regex,
710 # they are interpreted as part of the string (used to tell PHP
711 # that the content of the string should be inserted there).
712 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
713 "((?i){$images})([^{$uc}]|$)/";
714
715 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
716 $sk =& $this->mOptions->getSkin();
717
718 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
719 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
720 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
721 }
722 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
723 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
724 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
725 "</a>\\5", $s );
726 $s = str_replace( $unique, $protocol, $s );
727
728 $a = explode( "[{$protocol}:", " " . $s );
729 $s = array_shift( $a );
730 $s = substr( $s, 1 );
731
732 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
733 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
734
735 foreach ( $a as $line ) {
736 if ( preg_match( $e1, $line, $m ) ) {
737 $link = "{$protocol}:{$m[1]}";
738 $trail = $m[2];
739 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
740 else { $text = wfEscapeHTML( $link ); }
741 } else if ( preg_match( $e2, $line, $m ) ) {
742 $link = "{$protocol}:{$m[1]}";
743 $text = $m[2];
744 $trail = $m[3];
745 } else {
746 $s .= "[{$protocol}:" . $line;
747 continue;
748 }
749 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
750 $paren = "";
751 } else {
752 # Expand the URL for printable version
753 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
754 }
755 $la = $sk->getExternalLinkAttributes( $link, $text );
756 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
757
758 }
759 return $s;
760 }
761
762
763 /* private */ function replaceInternalLinks( $s )
764 {
765 global $wgLang, $wgLinkCache;
766 global $wgNamespacesWithSubpages, $wgLanguageCode;
767 static $fname = "Parser::replaceInternalLink" ;
768 wfProfileIn( $fname );
769
770 wfProfileIn( "$fname-setup" );
771 static $tc = FALSE;
772 # the % is needed to support urlencoded titles as well
773 if ( !$tc ) { $tc = Title::legalChars() . "#%"; }
774 $sk =& $this->mOptions->getSkin();
775
776 $a = explode( "[[", " " . $s );
777 $s = array_shift( $a );
778 $s = substr( $s, 1 );
779
780 # Match a link having the form [[namespace:link|alternate]]trail
781 static $e1 = FALSE;
782 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
783 # Match the end of a line for a word that's not followed by whitespace,
784 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
785 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
786 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
787 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
788
789
790 # Special and Media are pseudo-namespaces; no pages actually exist in them
791 static $image = FALSE;
792 static $special = FALSE;
793 static $media = FALSE;
794 static $category = FALSE;
795 if ( !$image ) { $image = Namespace::getImage(); }
796 if ( !$special ) { $special = Namespace::getSpecial(); }
797 if ( !$media ) { $media = Namespace::getMedia(); }
798 if ( !$category ) { $category = Namespace::getCategory(); }
799
800 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
801
802 if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $s, $m ) ) {
803 $new_prefix = $m[2];
804 $s = $m[1];
805 } else {
806 $new_prefix="";
807 }
808
809 wfProfileOut( "$fname-setup" );
810
811 foreach ( $a as $line ) {
812 $prefix = $new_prefix;
813
814 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
815 $text = $m[2];
816 # fix up urlencoded title texts
817 if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]);
818 $trail = $m[3];
819 } else { # Invalid form; output directly
820 $s .= $prefix . "[[" . $line ;
821 wfProfileOut( $fname );
822 continue;
823 }
824
825 /* Valid link forms:
826 Foobar -- normal
827 :Foobar -- override special treatment of prefix (images, language links)
828 /Foobar -- convert to CurrentPage/Foobar
829 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
830 */
831 $c = substr($m[1],0,1);
832 $noforce = ($c != ":");
833 if( $c == "/" ) { # subpage
834 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
835 $m[1]=substr($m[1],1,strlen($m[1])-2);
836 $noslash=$m[1];
837 } else {
838 $noslash=substr($m[1],1);
839 }
840 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
841 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
842 if( "" == $text ) {
843 $text= $m[1];
844 } # this might be changed for ugliness reasons
845 } else {
846 $link = $noslash; # no subpage allowed, use standard link
847 }
848 } elseif( $noforce ) { # no subpage
849 $link = $m[1];
850 } else {
851 $link = substr( $m[1], 1 );
852 }
853 $wasblank = ( "" == $text );
854 if( $wasblank )
855 $text = $link;
856
857 $nt = Title::newFromText( $link );
858 if( !$nt ) {
859 $s .= $prefix . "[[" . $line;
860 wfProfileOut( $fname );
861 continue;
862 }
863 $ns = $nt->getNamespace();
864 $iw = $nt->getInterWiki();
865 if( $noforce ) {
866 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
867 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
868 $s .= $prefix . $trail ;
869 wfProfileOut( $fname );
870 $s .= (trim($s) == '')? '': $s;
871 continue;
872 }
873 if ( $ns == $image ) {
874 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
875 $wgLinkCache->addImageLinkObj( $nt );
876 wfProfileOut( $fname );
877 continue;
878 }
879 if ( $ns == $category ) {
880 $t = $nt->getText() ;
881 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
882
883 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
884 $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix );
885 $wgLinkCache->resume();
886
887 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
888 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
889 $this->mOutput->mCategoryLinks[] = $t ;
890 $s .= $prefix . $trail ;
891 wfProfileOut( $fname );
892 continue;
893 }
894 }
895 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
896 ( strpos( $link, "#" ) == FALSE ) ) {
897 # Self-links are handled specially; generally de-link and change to bold.
898 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail );
899 wfProfileOut( $fname );
900 continue;
901 }
902
903 if( $ns == $media ) {
904 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
905 $wgLinkCache->addImageLinkObj( $nt );
906 wfProfileOut( $fname );
907 continue;
908 } elseif( $ns == $special ) {
909 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
910 wfProfileOut( $fname );
911 continue;
912 }
913 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
914 }
915 wfProfileOut( $fname );
916 return $s;
917 }
918
919 # Some functions here used by doBlockLevels()
920 #
921 /* private */ function closeParagraph()
922 {
923 $result = "";
924 if ( '' != $this->mLastSection ) {
925 $result = "</" . $this->mLastSection . ">\n";
926 }
927 $this->mInPre = false;
928 $this->mLastSection = "";
929 return $result;
930 }
931 # getCommon() returns the length of the longest common substring
932 # of both arguments, starting at the beginning of both.
933 #
934 /* private */ function getCommon( $st1, $st2 )
935 {
936 $fl = strlen( $st1 );
937 $shorter = strlen( $st2 );
938 if ( $fl < $shorter ) { $shorter = $fl; }
939
940 for ( $i = 0; $i < $shorter; ++$i ) {
941 if ( $st1{$i} != $st2{$i} ) { break; }
942 }
943 return $i;
944 }
945 # These next three functions open, continue, and close the list
946 # element appropriate to the prefix character passed into them.
947 #
948 /* private */ function openList( $char )
949 {
950 $result = $this->closeParagraph();
951
952 if ( "*" == $char ) { $result .= "<ul><li>"; }
953 else if ( "#" == $char ) { $result .= "<ol><li>"; }
954 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
955 else if ( ";" == $char ) {
956 $result .= "<dl><dt>";
957 $this->mDTopen = true;
958 }
959 else { $result = "<!-- ERR 1 -->"; }
960
961 return $result;
962 }
963
964 /* private */ function nextItem( $char )
965 {
966 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
967 else if ( ":" == $char || ";" == $char ) {
968 $close = "</dd>";
969 if ( $this->mDTopen ) { $close = "</dt>"; }
970 if ( ";" == $char ) {
971 $this->mDTopen = true;
972 return $close . "<dt>";
973 } else {
974 $this->mDTopen = false;
975 return $close . "<dd>";
976 }
977 }
978 return "<!-- ERR 2 -->";
979 }
980
981 /* private */function closeList( $char )
982 {
983 if ( "*" == $char ) { $text = "</li></ul>"; }
984 else if ( "#" == $char ) { $text = "</li></ol>"; }
985 else if ( ":" == $char ) {
986 if ( $this->mDTopen ) {
987 $this->mDTopen = false;
988 $text = "</dt></dl>";
989 } else {
990 $text = "</dd></dl>";
991 }
992 }
993 else { return "<!-- ERR 3 -->"; }
994 return $text."\n";
995 }
996
997 /* private */ function doBlockLevels( $text, $linestart ) {
998 $fname = "Parser::doBlockLevels";
999 wfProfileIn( $fname );
1000
1001 # Parsing through the text line by line. The main thing
1002 # happening here is handling of block-level elements p, pre,
1003 # and making lists from lines starting with * # : etc.
1004 #
1005 $textLines = explode( "\n", $text );
1006
1007 $lastPrefix = $output = $lastLine = '';
1008 $this->mDTopen = $inBlockElem = false;
1009 $prefixLength = 0;
1010 $paragraphStack = false;
1011
1012 if ( !$linestart ) {
1013 $output .= array_shift( $textLines );
1014 }
1015 foreach ( $textLines as $oLine ) {
1016 $lastPrefixLength = strlen( $lastPrefix );
1017 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1018 $preOpenMatch = preg_match("/<pre/i", $oLine );
1019 if (!$this->mInPre) {
1020 $this->mInPre = !empty($preOpenMatch);
1021 }
1022 if ( !$this->mInPre ) {
1023 # Multiple prefixes may abut each other for nested lists.
1024 $prefixLength = strspn( $oLine, "*#:;" );
1025 $pref = substr( $oLine, 0, $prefixLength );
1026
1027 # eh?
1028 $pref2 = str_replace( ";", ":", $pref );
1029 $t = substr( $oLine, $prefixLength );
1030 } else {
1031 # Don't interpret any other prefixes in preformatted text
1032 $prefixLength = 0;
1033 $pref = $pref2 = '';
1034 $t = $oLine;
1035 }
1036
1037 # List generation
1038 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1039 # Same as the last item, so no need to deal with nesting or opening stuff
1040 $output .= $this->nextItem( substr( $pref, -1 ) );
1041 $paragraphStack = false;
1042
1043 if ( ";" == substr( $pref, -1 ) ) {
1044 # The one nasty exception: definition lists work like this:
1045 # ; title : definition text
1046 # So we check for : in the remainder text to split up the
1047 # title and definition, without b0rking links.
1048 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1049 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1050 $term = $match[1];
1051 $output .= $term . $this->nextItem( ":" );
1052 $t = $match[2];
1053 }
1054 }
1055 } elseif( $prefixLength || $lastPrefixLength ) {
1056 # Either open or close a level...
1057 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1058 $paragraphStack = false;
1059
1060 while( $commonPrefixLength < $lastPrefixLength ) {
1061 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1062 --$lastPrefixLength;
1063 }
1064 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1065 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1066 }
1067 while ( $prefixLength > $commonPrefixLength ) {
1068 $char = substr( $pref, $commonPrefixLength, 1 );
1069 $output .= $this->openList( $char );
1070
1071 if ( ";" == $char ) {
1072 # FIXME: This is dupe of code above
1073 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1074 $term = $match[1];
1075 $output .= $term . $this->nextItem( ":" );
1076 $t = $match[2];
1077 }
1078 }
1079 ++$commonPrefixLength;
1080 }
1081 $lastPrefix = $pref2;
1082 }
1083 if( 0 == $prefixLength ) {
1084 # No prefix (not in list)--go to paragraph mode
1085 $uniq_prefix = UNIQ_PREFIX;
1086 // XXX: use a stack for nestable elements like span, table and div
1087 $openmatch = preg_match("/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<td|<p|<ul|<li)/i", $t );
1088 $closematch = preg_match(
1089 "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|".
1090 "<div|<\\/div|<hr|<\\/td|<\\/pre|<\\/p|".$uniq_prefix."-pre|<\\/li|<\\/ul)/i", $t );
1091 if ( $openmatch or $closematch ) {
1092 $paragraphStack = false;
1093 $output .= $this->closeParagraph();
1094 if($preOpenMatch and !$preCloseMatch) {
1095 $this->mInPre = true;
1096 }
1097 if ( $closematch ) {
1098 $inBlockElem = false;
1099 } else {
1100 $inBlockElem = true;
1101 }
1102 } else if ( !$inBlockElem && !$this->mInPre ) {
1103 if ( " " == $t{0} and trim($t) != '' ) {
1104 // pre
1105 if ($this->mLastSection != 'pre') {
1106 $paragraphStack = false;
1107 $output .= $this->closeParagraph().'<pre>';
1108 $this->mLastSection = 'pre';
1109 }
1110 } else {
1111 // paragraph
1112 if ( '' == trim($t) ) {
1113 if ( $paragraphStack ) {
1114 $output .= $paragraphStack.'<br/>';
1115 $paragraphStack = false;
1116 $this->mLastSection = 'p';
1117 } else {
1118 if ($this->mLastSection != 'p' ) {
1119 $output .= $this->closeParagraph();
1120 $this->mLastSection = '';
1121 $paragraphStack = "<p>";
1122 } else {
1123 $paragraphStack = '</p><p>';
1124 }
1125 }
1126 } else {
1127 if ( $paragraphStack ) {
1128 $output .= $paragraphStack;
1129 $paragraphStack = false;
1130 $this->mLastSection = 'p';
1131 } else if ($this->mLastSection != 'p') {
1132 $output .= $this->closeParagraph().'<p>';
1133 $this->mLastSection = 'p';
1134 }
1135 }
1136 }
1137 }
1138 }
1139 if ($paragraphStack === false) {
1140 $output .= $t."\n";
1141 }
1142 }
1143 while ( $prefixLength ) {
1144 $output .= $this->closeList( $pref2{$prefixLength-1} );
1145 --$prefixLength;
1146 }
1147 if ( "" != $this->mLastSection ) {
1148 $output .= "</" . $this->mLastSection . ">";
1149 $this->mLastSection = "";
1150 }
1151
1152 wfProfileOut( $fname );
1153 return $output;
1154 }
1155
1156 function getVariableValue( $index ) {
1157 global $wgLang, $wgSitename, $wgServer;
1158
1159 switch ( $index ) {
1160 case MAG_CURRENTMONTH:
1161 return date( "m" );
1162 case MAG_CURRENTMONTHNAME:
1163 return $wgLang->getMonthName( date("n") );
1164 case MAG_CURRENTMONTHNAMEGEN:
1165 return $wgLang->getMonthNameGen( date("n") );
1166 case MAG_CURRENTDAY:
1167 return date("j");
1168 case MAG_PAGENAME:
1169 return $this->mTitle->getText();
1170 case MAG_NAMESPACE:
1171 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1172 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1173 case MAG_CURRENTDAYNAME:
1174 return $wgLang->getWeekdayName( date("w")+1 );
1175 case MAG_CURRENTYEAR:
1176 return date( "Y" );
1177 case MAG_CURRENTTIME:
1178 return $wgLang->time( wfTimestampNow(), false );
1179 case MAG_NUMBEROFARTICLES:
1180 return wfNumberOfArticles();
1181 case MAG_SITENAME:
1182 return $wgSitename;
1183 case MAG_SERVER:
1184 return $wgServer;
1185 default:
1186 return NULL;
1187 }
1188 }
1189
1190 function initialiseVariables()
1191 {
1192 global $wgVariableIDs;
1193 $this->mVariables = array();
1194 foreach ( $wgVariableIDs as $id ) {
1195 $mw =& MagicWord::get( $id );
1196 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1197 }
1198 }
1199
1200 /* private */ function replaceVariables( $text, $args = array() )
1201 {
1202 global $wgLang, $wgScript, $wgArticlePath;
1203
1204 $fname = "Parser::replaceVariables";
1205 wfProfileIn( $fname );
1206
1207 $bail = false;
1208 if ( !$this->mVariables ) {
1209 $this->initialiseVariables();
1210 }
1211 $titleChars = Title::legalChars();
1212
1213 # This function is called recursively. To keep track of arguments we need a stack:
1214 array_push( $this->mArgStack, $args );
1215
1216 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1217 $GLOBALS['wgCurParser'] =& $this;
1218
1219 # Argument substitution
1220 if ( $this->mOutputType == OT_HTML ) {
1221 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text );
1222 }
1223 # Double brace substitution
1224 $regex = "/(\\n?){{([$titleChars]*)(\\|.*?|)}}/s";
1225 $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text );
1226
1227 array_pop( $this->mArgStack );
1228
1229 wfProfileOut( $fname );
1230 return $text;
1231 }
1232
1233 function braceSubstitution( $matches )
1234 {
1235 global $wgLinkCache, $wgLang;
1236 $fname = "Parser::braceSubstitution";
1237 $found = false;
1238 $nowiki = false;
1239 $noparse = false;
1240
1241 $title = NULL;
1242
1243 # $newline is an optional newline character before the braces
1244 # $part1 is the bit before the first |, and must contain only title characters
1245 # $args is a list of arguments, starting from index 0, not including $part1
1246
1247 $newline = $matches[1];
1248 $part1 = $matches[2];
1249 # If the third subpattern matched anything, it will start with |
1250 if ( $matches[3] !== "" ) {
1251 $args = explode( "|", substr( $matches[3], 1 ) );
1252 } else {
1253 $args = array();
1254 }
1255 $argc = count( $args );
1256
1257 # {{{}}}
1258 if ( strpos( $matches[0], "{{{" ) !== false ) {
1259 $text = $matches[0];
1260 $found = true;
1261 $noparse = true;
1262 }
1263
1264 # SUBST
1265 if ( !$found ) {
1266 $mwSubst =& MagicWord::get( MAG_SUBST );
1267 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1268 if ( $this->mOutputType != OT_WIKI ) {
1269 # Invalid SUBST not replaced at PST time
1270 # Return without further processing
1271 $text = $matches[0];
1272 $found = true;
1273 $noparse= true;
1274 }
1275 } elseif ( $this->mOutputType == OT_WIKI ) {
1276 # SUBST not found in PST pass, do nothing
1277 $text = $matches[0];
1278 $found = true;
1279 }
1280 }
1281
1282 # MSG, MSGNW and INT
1283 if ( !$found ) {
1284 # Check for MSGNW:
1285 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1286 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1287 $nowiki = true;
1288 } else {
1289 # Remove obsolete MSG:
1290 $mwMsg =& MagicWord::get( MAG_MSG );
1291 $mwMsg->matchStartAndRemove( $part1 );
1292 }
1293
1294 # Check if it is an internal message
1295 $mwInt =& MagicWord::get( MAG_INT );
1296 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1297 if ( $this->incrementIncludeCount( "int:$part1" ) ) {
1298 $text = wfMsgReal( $part1, $args, true );
1299 $found = true;
1300 }
1301 }
1302 }
1303
1304 # NS
1305 if ( !$found ) {
1306 # Check for NS: (namespace expansion)
1307 $mwNs = MagicWord::get( MAG_NS );
1308 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1309 if ( intval( $part1 ) ) {
1310 $text = $wgLang->getNsText( intval( $part1 ) );
1311 $found = true;
1312 } else {
1313 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1314 if ( !is_null( $index ) ) {
1315 $text = $wgLang->getNsText( $index );
1316 $found = true;
1317 }
1318 }
1319 }
1320 }
1321
1322 # LOCALURL and LOCALURLE
1323 if ( !$found ) {
1324 $mwLocal = MagicWord::get( MAG_LOCALURL );
1325 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1326
1327 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1328 $func = 'getLocalURL';
1329 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1330 $func = 'escapeLocalURL';
1331 } else {
1332 $func = '';
1333 }
1334
1335 if ( $func !== '' ) {
1336 $title = Title::newFromText( $part1 );
1337 if ( !is_null( $title ) ) {
1338 if ( $argc > 0 ) {
1339 $text = $title->$func( $args[0] );
1340 } else {
1341 $text = $title->$func();
1342 }
1343 $found = true;
1344 }
1345 }
1346 }
1347
1348 # Internal variables
1349 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1350 $text = $this->mVariables[$part1];
1351 $found = true;
1352 $this->mOutput->mContainsOldMagic = true;
1353 }
1354 /*
1355 # Arguments input from the caller
1356 $inputArgs = end( $this->mArgStack );
1357 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1358 $text = $inputArgs[$part1];
1359 $found = true;
1360 }
1361 */
1362 # Load from database
1363 if ( !$found ) {
1364 $title = Title::newFromText( $part1, NS_TEMPLATE );
1365 if ( !is_null( $title ) && !$title->isExternal() ) {
1366 # Check for excessive inclusion
1367 $dbk = $title->getPrefixedDBkey();
1368 if ( $this->incrementIncludeCount( $dbk ) ) {
1369 $article = new Article( $title );
1370 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1371 if ( $articleContent !== false ) {
1372 $found = true;
1373 $text = $articleContent;
1374
1375 }
1376 }
1377
1378 # If the title is valid but undisplayable, make a link to it
1379 if ( $this->mOutputType == OT_HTML && !$found ) {
1380 $text = "[[" . $title->getPrefixedText() . "]]";
1381 $found = true;
1382 }
1383 }
1384 }
1385
1386 # Recursive parsing, escaping and link table handling
1387 # Only for HTML output
1388 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1389 $text = wfEscapeWikiText( $text );
1390 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1391 # Clean up argument array
1392 $assocArgs = array();
1393 $index = 1;
1394 foreach( $args as $arg ) {
1395 $eqpos = strpos( $arg, "=" );
1396 if ( $eqpos === false ) {
1397 $assocArgs[$index++] = $arg;
1398 } else {
1399 $name = trim( substr( $arg, 0, $eqpos ) );
1400 $value = trim( substr( $arg, $eqpos+1 ) );
1401 if ( $value === false ) {
1402 $value = "";
1403 }
1404 if ( $name !== false ) {
1405 $assocArgs[$name] = $value;
1406 }
1407 }
1408 }
1409
1410 # Do not enter included links in link table
1411 if ( !is_null( $title ) ) {
1412 $wgLinkCache->suspend();
1413 }
1414
1415 # Run full parser on the included text
1416 $text = $this->stripParse( $text, (bool)$newline, $assocArgs );
1417
1418 # Resume the link cache and register the inclusion as a link
1419 if ( !is_null( $title ) ) {
1420 $wgLinkCache->resume();
1421 $wgLinkCache->addLinkObj( $title );
1422 }
1423 }
1424
1425 if ( !$found ) {
1426 return $matches[0];
1427 } else {
1428 return $text;
1429 }
1430 }
1431
1432 # Triple brace replacement -- used for template arguments
1433 function argSubstitution( $matches )
1434 {
1435 $newline = $matches[1];
1436 $arg = trim( $matches[2] );
1437 $text = $matches[0];
1438 $inputArgs = end( $this->mArgStack );
1439
1440 if ( array_key_exists( $arg, $inputArgs ) ) {
1441 $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() );
1442 }
1443
1444 return $text;
1445 }
1446
1447 # Returns true if the function is allowed to include this entity
1448 function incrementIncludeCount( $dbk )
1449 {
1450 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1451 $this->mIncludeCount[$dbk] = 0;
1452 }
1453 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1454 return true;
1455 } else {
1456 return false;
1457 }
1458 }
1459
1460
1461 # Cleans up HTML, removes dangerous tags and attributes
1462 /* private */ function removeHTMLtags( $text )
1463 {
1464 global $wgUseTidy, $wgUserHtml;
1465 $fname = "Parser::removeHTMLtags";
1466 wfProfileIn( $fname );
1467
1468 if( $wgUserHtml ) {
1469 $htmlpairs = array( # Tags that must be closed
1470 "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
1471 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1472 "strike", "strong", "tt", "var", "div", "center",
1473 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1474 "ruby", "rt" , "rb" , "rp", "p"
1475 );
1476 $htmlsingle = array(
1477 "br", "hr", "li", "dt", "dd"
1478 );
1479 $htmlnest = array( # Tags that can be nested--??
1480 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1481 "dl", "font", "big", "small", "sub", "sup"
1482 );
1483 $tabletags = array( # Can only appear inside table
1484 "td", "th", "tr"
1485 );
1486 } else {
1487 $htmlpairs = array();
1488 $htmlsingle = array();
1489 $htmlnest = array();
1490 $tabletags = array();
1491 }
1492
1493 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1494 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1495
1496 $htmlattrs = $this->getHTMLattrs () ;
1497
1498 # Remove HTML comments
1499 $text = preg_replace( "/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU", "$2", $text );
1500
1501 $bits = explode( "<", $text );
1502 $text = array_shift( $bits );
1503 if(!$wgUseTidy) {
1504 $tagstack = array(); $tablestack = array();
1505 foreach ( $bits as $x ) {
1506 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1507 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1508 $x, $regs );
1509 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1510 error_reporting( $prev );
1511
1512 $badtag = 0 ;
1513 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1514 # Check our stack
1515 if ( $slash ) {
1516 # Closing a tag...
1517 if ( ! in_array( $t, $htmlsingle ) &&
1518 ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
1519 if(!empty($ot)) array_push( $tagstack, $ot );
1520 $badtag = 1;
1521 } else {
1522 if ( $t == "table" ) {
1523 $tagstack = array_pop( $tablestack );
1524 }
1525 $newparams = "";
1526 }
1527 } else {
1528 # Keep track for later
1529 if ( in_array( $t, $tabletags ) &&
1530 ! in_array( "table", $tagstack ) ) {
1531 $badtag = 1;
1532 } else if ( in_array( $t, $tagstack ) &&
1533 ! in_array ( $t , $htmlnest ) ) {
1534 $badtag = 1 ;
1535 } else if ( ! in_array( $t, $htmlsingle ) ) {
1536 if ( $t == "table" ) {
1537 array_push( $tablestack, $tagstack );
1538 $tagstack = array();
1539 }
1540 array_push( $tagstack, $t );
1541 }
1542 # Strip non-approved attributes from the tag
1543 $newparams = $this->fixTagAttributes($params);
1544
1545 }
1546 if ( ! $badtag ) {
1547 $rest = str_replace( ">", "&gt;", $rest );
1548 $text .= "<$slash$t $newparams$brace$rest";
1549 continue;
1550 }
1551 }
1552 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1553 }
1554 # Close off any remaining tags
1555 while ( $t = array_pop( $tagstack ) ) {
1556 $text .= "</$t>\n";
1557 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1558 }
1559 } else {
1560 # this might be possible using tidy itself
1561 foreach ( $bits as $x ) {
1562 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1563 $x, $regs );
1564 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1565 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1566 $newparams = $this->fixTagAttributes($params);
1567 $rest = str_replace( ">", "&gt;", $rest );
1568 $text .= "<$slash$t $newparams$brace$rest";
1569 } else {
1570 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1571 }
1572 }
1573 }
1574 wfProfileOut( $fname );
1575 return $text;
1576 }
1577
1578
1579 /*
1580 *
1581 * This function accomplishes several tasks:
1582 * 1) Auto-number headings if that option is enabled
1583 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1584 * 3) Add a Table of contents on the top for users who have enabled the option
1585 * 4) Auto-anchor headings
1586 *
1587 * It loops through all headlines, collects the necessary data, then splits up the
1588 * string and re-inserts the newly formatted headlines.
1589 *
1590 */
1591
1592 /* private */ function formatHeadings( $text, $isMain=true )
1593 {
1594 global $wgInputEncoding;
1595
1596 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1597 $doShowToc = $this->mOptions->getShowToc();
1598 if( !$this->mTitle->userCanEdit() ) {
1599 $showEditLink = 0;
1600 $rightClickHack = 0;
1601 } else {
1602 $showEditLink = $this->mOptions->getEditSection();
1603 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1604 }
1605
1606 # Inhibit editsection links if requested in the page
1607 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1608 if( $esw->matchAndRemove( $text ) ) {
1609 $showEditLink = 0;
1610 }
1611 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1612 # do not add TOC
1613 $mw =& MagicWord::get( MAG_NOTOC );
1614 if( $mw->matchAndRemove( $text ) ) {
1615 $doShowToc = 0;
1616 }
1617
1618 # never add the TOC to the Main Page. This is an entry page that should not
1619 # be more than 1-2 screens large anyway
1620 if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) {
1621 $doShowToc = 0;
1622 }
1623
1624 # Get all headlines for numbering them and adding funky stuff like [edit]
1625 # links - this is for later, but we need the number of headlines right now
1626 $numMatches = preg_match_all( "/<H([1-6])(.*?" . ">)(.*?)<\/H[1-6]>/i", $text, $matches );
1627
1628 # if there are fewer than 4 headlines in the article, do not show TOC
1629 if( $numMatches < 4 ) {
1630 $doShowToc = 0;
1631 }
1632
1633 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1634 # override above conditions and always show TOC
1635 $mw =& MagicWord::get( MAG_FORCETOC );
1636 if ($mw->matchAndRemove( $text ) ) {
1637 $doShowToc = 1;
1638 }
1639
1640
1641 # We need this to perform operations on the HTML
1642 $sk =& $this->mOptions->getSkin();
1643
1644 # headline counter
1645 $headlineCount = 0;
1646
1647 # Ugh .. the TOC should have neat indentation levels which can be
1648 # passed to the skin functions. These are determined here
1649 $toclevel = 0;
1650 $toc = "";
1651 $full = "";
1652 $head = array();
1653 $sublevelCount = array();
1654 $level = 0;
1655 $prevlevel = 0;
1656 foreach( $matches[3] as $headline ) {
1657 $numbering = "";
1658 if( $level ) {
1659 $prevlevel = $level;
1660 }
1661 $level = $matches[1][$headlineCount];
1662 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1663 # reset when we enter a new level
1664 $sublevelCount[$level] = 0;
1665 $toc .= $sk->tocIndent( $level - $prevlevel );
1666 $toclevel += $level - $prevlevel;
1667 }
1668 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1669 # reset when we step back a level
1670 $sublevelCount[$level+1]=0;
1671 $toc .= $sk->tocUnindent( $prevlevel - $level );
1672 $toclevel -= $prevlevel - $level;
1673 }
1674 # count number of headlines for each level
1675 @$sublevelCount[$level]++;
1676 if( $doNumberHeadings || $doShowToc ) {
1677 $dot = 0;
1678 for( $i = 1; $i <= $level; $i++ ) {
1679 if( !empty( $sublevelCount[$i] ) ) {
1680 if( $dot ) {
1681 $numbering .= ".";
1682 }
1683 $numbering .= $sublevelCount[$i];
1684 $dot = 1;
1685 }
1686 }
1687 }
1688
1689 # The canonized header is a version of the header text safe to use for links
1690 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1691 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1692
1693 # strip out HTML
1694 $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
1695 $tocline = trim( $canonized_headline );
1696 $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
1697 # strip out urlencoded &nbsp; (inserted for french spaces, e.g. first space in 'something : something')
1698 $canonized_headline = str_replace('%C2%A0','_', $canonized_headline);
1699 $refer[$headlineCount] = $canonized_headline;
1700
1701 # count how many in assoc. array so we can track dupes in anchors
1702 @$refers[$canonized_headline]++;
1703 $refcount[$headlineCount]=$refers[$canonized_headline];
1704
1705 # Prepend the number to the heading text
1706
1707 if( $doNumberHeadings || $doShowToc ) {
1708 $tocline = $numbering . " " . $tocline;
1709
1710 # Don't number the heading if it is the only one (looks silly)
1711 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1712 # the two are different if the line contains a link
1713 $headline=$numbering . " " . $headline;
1714 }
1715 }
1716
1717 # Create the anchor for linking from the TOC to the section
1718 $anchor = $canonized_headline;
1719 if($refcount[$headlineCount] > 1 ) {
1720 $anchor .= "_" . $refcount[$headlineCount];
1721 }
1722 if( $doShowToc ) {
1723 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1724 }
1725 if( $showEditLink ) {
1726 if ( empty( $head[$headlineCount] ) ) {
1727 $head[$headlineCount] = "";
1728 }
1729 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1730 }
1731
1732 # Add the edit section span
1733 if( $rightClickHack ) {
1734 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1735 }
1736
1737 # give headline the correct <h#> tag
1738 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1739
1740 $headlineCount++;
1741 }
1742
1743 if( $doShowToc ) {
1744 $toclines = $headlineCount;
1745 $toc .= $sk->tocUnindent( $toclevel );
1746 $toc = $sk->tocTable( $toc );
1747 }
1748
1749 # split up and insert constructed headlines
1750
1751 $blocks = preg_split( "/<H[1-6].*?" . ">.*?<\/H[1-6]>/i", $text );
1752 $i = 0;
1753
1754 foreach( $blocks as $block ) {
1755 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1756 # This is the [edit] link that appears for the top block of text when
1757 # section editing is enabled
1758
1759 # Disabled because it broke block formatting
1760 # For example, a bullet point in the top line
1761 # $full .= $sk->editSectionLink(0);
1762 }
1763 $full .= $block;
1764 if( $doShowToc && !$i && $isMain) {
1765 # Top anchor now in skin
1766 $full = $full.$toc;
1767 }
1768
1769 if( !empty( $head[$i] ) ) {
1770 $full .= $head[$i];
1771 }
1772 $i++;
1773 }
1774
1775 return $full;
1776 }
1777
1778 /* private */ function magicISBN( $text )
1779 {
1780 global $wgLang;
1781
1782 $a = split( "ISBN ", " $text" );
1783 if ( count ( $a ) < 2 ) return $text;
1784 $text = substr( array_shift( $a ), 1);
1785 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1786
1787 foreach ( $a as $x ) {
1788 $isbn = $blank = "" ;
1789 while ( " " == $x{0} ) {
1790 $blank .= " ";
1791 $x = substr( $x, 1 );
1792 }
1793 while ( strstr( $valid, $x{0} ) != false ) {
1794 $isbn .= $x{0};
1795 $x = substr( $x, 1 );
1796 }
1797 $num = str_replace( "-", "", $isbn );
1798 $num = str_replace( " ", "", $num );
1799
1800 if ( "" == $num ) {
1801 $text .= "ISBN $blank$x";
1802 } else {
1803 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1804 $text .= "<a href=\"" .
1805 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1806 "\" class=\"internal\">ISBN $isbn</a>";
1807 $text .= $x;
1808 }
1809 }
1810 return $text;
1811 }
1812 /* private */ function magicRFC( $text )
1813 {
1814 global $wgLang;
1815
1816 $a = split( "RFC ", " $text" );
1817 if ( count ( $a ) < 2 ) return $text;
1818 $text = substr( array_shift( $a ), 1);
1819 $valid = "0123456789";
1820
1821 foreach ( $a as $x ) {
1822 $rfc = $blank = "" ;
1823 while ( " " == $x{0} ) {
1824 $blank .= " ";
1825 $x = substr( $x, 1 );
1826 }
1827 while ( strstr( $valid, $x{0} ) != false ) {
1828 $rfc .= $x{0};
1829 $x = substr( $x, 1 );
1830 }
1831
1832 if ( "" == $rfc ) {
1833 $text .= "RFC $blank$x";
1834 } else {
1835 $url = wfmsg( "rfcurl" );
1836 $url = str_replace( "$1", $rfc, $url);
1837 $sk =& $this->mOptions->getSkin();
1838 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1839 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1840 }
1841 }
1842 return $text;
1843 }
1844
1845 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1846 {
1847 $this->mOptions = $options;
1848 $this->mTitle =& $title;
1849 $this->mOutputType = OT_WIKI;
1850
1851 if ( $clearState ) {
1852 $this->clearState();
1853 }
1854
1855 $stripState = false;
1856 $pairs = array(
1857 "\r\n" => "\n",
1858 );
1859 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
1860 // now with regexes
1861 $pairs = array(
1862 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
1863 "/<br *?>/i" => "<br/>",
1864 );
1865 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
1866 $text = $this->strip( $text, $stripState, false );
1867 $text = $this->pstPass2( $text, $user );
1868 $text = $this->unstrip( $text, $stripState );
1869 return $text;
1870 }
1871
1872 /* private */ function pstPass2( $text, &$user )
1873 {
1874 global $wgLang, $wgLocaltimezone, $wgCurParser;
1875
1876 # Variable replacement
1877 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
1878 $text = $this->replaceVariables( $text );
1879
1880 # Signatures
1881 #
1882 $n = $user->getName();
1883 $k = $user->getOption( "nickname" );
1884 if ( "" == $k ) { $k = $n; }
1885 if(isset($wgLocaltimezone)) {
1886 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1887 }
1888 /* Note: this is an ugly timezone hack for the European wikis */
1889 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1890 " (" . date( "T" ) . ")";
1891 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1892
1893 $text = preg_replace( "/~~~~~/", $d, $text );
1894 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1895 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1896 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1897 Namespace::getUser() ) . ":$n|$k]]", $text );
1898
1899 # Context links: [[|name]] and [[name (context)|]]
1900 #
1901 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1902 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1903 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1904 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1905
1906 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1907 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1908 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1909 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1910 # [[ns:page (cont)|]]
1911 $context = "";
1912 $t = $this->mTitle->getText();
1913 if ( preg_match( $conpat, $t, $m ) ) {
1914 $context = $m[2];
1915 }
1916 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1917 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1918 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1919
1920 if ( "" == $context ) {
1921 $text = preg_replace( $p2, "[[\\1]]", $text );
1922 } else {
1923 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1924 }
1925
1926 /*
1927 $mw =& MagicWord::get( MAG_SUBST );
1928 $wgCurParser = $this->fork();
1929 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
1930 $this->merge( $wgCurParser );
1931 */
1932
1933 # Trim trailing whitespace
1934 # MAG_END (__END__) tag allows for trailing
1935 # whitespace to be deliberately included
1936 $text = rtrim( $text );
1937 $mw =& MagicWord::get( MAG_END );
1938 $mw->matchAndRemove( $text );
1939
1940 return $text;
1941 }
1942
1943 # Set up some variables which are usually set up in parse()
1944 # so that an external function can call some class members with confidence
1945 function startExternalParse( &$title, $options, $outputType, $clearState = true )
1946 {
1947 $this->mTitle =& $title;
1948 $this->mOptions = $options;
1949 $this->mOutputType = $outputType;
1950 if ( $clearState ) {
1951 $this->clearState();
1952 }
1953 }
1954
1955 function transformMsg( $text, $options ) {
1956 global $wgTitle;
1957 static $executing = false;
1958
1959 # Guard against infinite recursion
1960 if ( $executing ) {
1961 return $text;
1962 }
1963 $executing = true;
1964
1965 $this->mTitle = $wgTitle;
1966 $this->mOptions = $options;
1967 $this->mOutputType = OT_MSG;
1968 $this->clearState();
1969 $text = $this->replaceVariables( $text );
1970
1971 $executing = false;
1972 return $text;
1973 }
1974 }
1975
1976 class ParserOutput
1977 {
1978 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1979 var $mTouched; # Used for caching
1980
1981 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1982 $containsOldMagic = false )
1983 {
1984 $this->mText = $text;
1985 $this->mLanguageLinks = $languageLinks;
1986 $this->mCategoryLinks = $categoryLinks;
1987 $this->mContainsOldMagic = $containsOldMagic;
1988 $this->mTouched = "";
1989 }
1990
1991 function getText() { return $this->mText; }
1992 function getLanguageLinks() { return $this->mLanguageLinks; }
1993 function getCategoryLinks() { return $this->mCategoryLinks; }
1994 function getTouched() { return $this->mTouched; }
1995 function containsOldMagic() { return $this->mContainsOldMagic; }
1996 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1997 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1998 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1999 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2000 function setTouched( $t ) { return wfSetVar( $this->mTouched, $t ); }
2001
2002 function merge( $other ) {
2003 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2004 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2005 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2006 }
2007
2008 }
2009
2010 class ParserOptions
2011 {
2012 # All variables are private
2013 var $mUseTeX; # Use texvc to expand <math> tags
2014 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2015 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2016 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2017 var $mAllowExternalImages; # Allow external images inline
2018 var $mSkin; # Reference to the preferred skin
2019 var $mDateFormat; # Date format index
2020 var $mEditSection; # Create "edit section" links
2021 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2022 var $mNumberHeadings; # Automatically number headings
2023 var $mShowToc; # Show table of contents
2024
2025 function getUseTeX() { return $this->mUseTeX; }
2026 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2027 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2028 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2029 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2030 function getSkin() { return $this->mSkin; }
2031 function getDateFormat() { return $this->mDateFormat; }
2032 function getEditSection() { return $this->mEditSection; }
2033 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2034 function getNumberHeadings() { return $this->mNumberHeadings; }
2035 function getShowToc() { return $this->mShowToc; }
2036
2037 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2038 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2039 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2040 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2041 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2042 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
2043 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2044 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2045 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2046 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2047 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2048
2049 /* static */ function newFromUser( &$user )
2050 {
2051 $popts = new ParserOptions;
2052 $popts->initialiseFromUser( $user );
2053 return $popts;
2054 }
2055
2056 function initialiseFromUser( &$userInput )
2057 {
2058 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2059
2060 if ( !$userInput ) {
2061 $user = new User;
2062 $user->setLoaded( true );
2063 } else {
2064 $user =& $userInput;
2065 }
2066
2067 $this->mUseTeX = $wgUseTeX;
2068 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2069 $this->mUseDynamicDates = $wgUseDynamicDates;
2070 $this->mInterwikiMagic = $wgInterwikiMagic;
2071 $this->mAllowExternalImages = $wgAllowExternalImages;
2072 $this->mSkin =& $user->getSkin();
2073 $this->mDateFormat = $user->getOption( "date" );
2074 $this->mEditSection = $user->getOption( "editsection" );
2075 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
2076 $this->mNumberHeadings = $user->getOption( "numberheadings" );
2077 $this->mShowToc = $user->getOption( "showtoc" );
2078 }
2079
2080
2081 }
2082
2083 # Regex callbacks, used in Parser::replaceVariables
2084 function wfBraceSubstitution( $matches )
2085 {
2086 global $wgCurParser;
2087 $titleChars = Title::legalChars();
2088
2089 # not really nested stuff, just multiple includes separated by titlechars
2090 if(preg_match("/^([^}{]*)}}([^}{]*{{)(.*)$/s", $matches[2], $m)) {
2091 $text = wfInternalBraceSubstitution( $m[1] );
2092 $string = $text.$m[2].$m[3];
2093 while(preg_match("/^([^}{]*){{([$titleChars]*?)(}}[^}{]*{{.*)?$/s", $string, $m)) {
2094 $text = wfInternalBraceSubstitution( $m[2] );
2095 $trail = !empty($m[3])? preg_replace("/^}}/", '', $m[3]):'';
2096 $string = $m[1].$text.$trail;
2097 }
2098 return $string;
2099 }
2100
2101 # Double brace substitution, expand bar in {{foo{{bar}}}}
2102 $i = 0;
2103 while(preg_match("/{{([$titleChars]*?)}}/", $matches[2], $internalmatches) and $i < 30) {
2104 $text = wfInternalBraceSubstitution( $internalmatches[1] );
2105 $matches[0] = str_replace($internalmatches[0], $text , $matches[0]);
2106 $matches[2] = str_replace($internalmatches[0], $text , $matches[2]);
2107 $i++;
2108 }
2109
2110 return $wgCurParser->braceSubstitution( $matches );
2111 }
2112
2113 function wfArgSubstitution( $matches )
2114 {
2115 global $wgCurParser;
2116 return $wgCurParser->argSubstitution( $matches );
2117 }
2118
2119 # XXX: i don't think this is the most elegant way to do it..
2120 function wfInternalBraceSubstitution( $part1 ) {
2121 global $wgLinkCache, $wgLang, $wgCurParser;
2122 $fname = "wfInternalBraceSubstitution";
2123 $found = false;
2124 $nowiki = false;
2125 $noparse = false;
2126
2127 $title = NULL;
2128
2129 # $newline is an optional newline character before the braces
2130 # $part1 is the bit before the first |, and must contain only title characters
2131 # $args is a list of arguments, starting from index 0, not including $part1
2132
2133 # SUBST
2134 if ( !$found ) {
2135 $mwSubst =& MagicWord::get( MAG_SUBST );
2136 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
2137 if ( $wgCurParser->mOutputType != OT_WIKI ) {
2138 # Invalid SUBST not replaced at PST time
2139 # Return without further processing
2140 $text = $matches[0];
2141 $found = true;
2142 $noparse= true;
2143 }
2144 } elseif ( $wgCurParser->mOutputType == OT_WIKI ) {
2145 # SUBST not found in PST pass, do nothing
2146 $text = $matches[0];
2147 $found = true;
2148 }
2149 }
2150
2151 # MSG, MSGNW and INT
2152 if ( !$found ) {
2153 # Check for MSGNW:
2154 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
2155 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
2156 $nowiki = true;
2157 } else {
2158 # Remove obsolete MSG:
2159 $mwMsg =& MagicWord::get( MAG_MSG );
2160 $mwMsg->matchStartAndRemove( $part1 );
2161 }
2162
2163 # Check if it is an internal message
2164 $mwInt =& MagicWord::get( MAG_INT );
2165 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
2166 if ( $wgCurParser->incrementIncludeCount( "int:$part1" ) ) {
2167 $text = wfMsgReal( $part1, array(), true );
2168 $found = true;
2169 }
2170 }
2171 }
2172
2173 # NS
2174 if ( !$found ) {
2175 # Check for NS: (namespace expansion)
2176 $mwNs = MagicWord::get( MAG_NS );
2177 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
2178 if ( intval( $part1 ) ) {
2179 $text = $wgLang->getNsText( intval( $part1 ) );
2180 $found = true;
2181 } else {
2182 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
2183 if ( !is_null( $index ) ) {
2184 $text = $wgLang->getNsText( $index );
2185 $found = true;
2186 }
2187 }
2188 }
2189 }
2190
2191 # LOCALURL and LOCALURLE
2192 if ( !$found ) {
2193 $mwLocal = MagicWord::get( MAG_LOCALURL );
2194 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
2195
2196 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
2197 $func = 'getLocalURL';
2198 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
2199 $func = 'escapeLocalURL';
2200 } else {
2201 $func = '';
2202 }
2203
2204 if ( $func !== '' ) {
2205 $title = Title::newFromText( $part1 );
2206 if ( !is_null( $title ) ) {
2207 $text = $title->$func();
2208 $found = true;
2209 }
2210 }
2211 }
2212
2213 # Internal variables
2214 if ( !$found && array_key_exists( $part1, $wgCurParser->mVariables ) ) {
2215 $text = $wgCurParser->mVariables[$part1];
2216 $found = true;
2217 $wgCurParser->mOutput->mContainsOldMagic = true;
2218 }
2219
2220 # Load from database
2221 if ( !$found ) {
2222 $title = Title::newFromText( $part1, NS_TEMPLATE );
2223 if ( !is_null( $title ) && !$title->isExternal() ) {
2224 # Check for excessive inclusion
2225 $dbk = $title->getPrefixedDBkey();
2226 if ( $wgCurParser->incrementIncludeCount( $dbk ) ) {
2227 $article = new Article( $title );
2228 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
2229 if ( $articleContent !== false ) {
2230 $found = true;
2231 $text = $articleContent;
2232
2233 }
2234 }
2235
2236 # If the title is valid but undisplayable, make a link to it
2237 if ( $wgCurParser->mOutputType == OT_HTML && !$found ) {
2238 $text = "[[" . $title->getPrefixedText() . "]]";
2239 $found = true;
2240 }
2241 }
2242 }
2243
2244 if ( !$found ) {
2245 return $matches[0];
2246 } else {
2247 return $text;
2248 }
2249 }
2250 ?>