More unitialized variable cleanup && 'pure' register_globals cleanup...
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24 var $mContainsOldMagic = 0;
25
26 # Temporary:
27 var $mOptions, $mTitle;
28
29 function Parser()
30 {
31 $this->clearState();
32 }
33
34 function clearState()
35 {
36 $this->mOutput = new ParserOutput;
37 $this->mAutonumber = 0;
38 $this->mLastSection = "";
39 $this->mDTopen = false;
40 $this->mStripState = false;
41 }
42
43 # First pass--just handle <nowiki> sections, pass the rest off
44 # to doWikiPass2() which does all the real work.
45 #
46 # Returns a ParserOutput
47 #
48 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
49 {
50 $fname = "Parser::parse";
51 wfProfileIn( $fname );
52
53 if ( $clearState ) {
54 $this->clearState();
55 }
56
57 $this->mOptions = $options;
58 $this->mTitle =& $title;
59
60 $stripState = NULL;
61 $text = $this->strip( $text, $this->mStripState, true );
62 $text = $this->doWikiPass2( $text, $linestart );
63 $text = $this->unstrip( $text, $this->mStripState );
64
65 $this->mOutput->setText( $text );
66 wfProfileOut( $fname );
67 return $this->mOutput;
68 }
69
70 /* static */ function getRandomString()
71 {
72 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
73 }
74
75 # Strips <nowiki>, <pre> and <math>
76 # Returns the text, and fills an array with data needed in unstrip()
77 #
78 function strip( $text, &$state, $render = true )
79 {
80 $state = array(
81 'nwlist' => array(),
82 'nwsecs' => 0,
83 'nwunq' => Parser::getRandomString(),
84 'mathlist' => array(),
85 'mathsecs' => 0,
86 'mathunq' => Parser::getRandomString(),
87 'prelist' => array(),
88 'presecs' => 0,
89 'preunq' => Parser::getRandomString()
90 );
91
92 $stripped = "";
93 $stripped2 = "";
94 $stripped3 = "";
95
96 # Replace any instances of the placeholders
97 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
98 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
99 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
100
101 while ( "" != $text ) {
102 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
103 $stripped .= $p[0];
104 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
105 $text = "";
106 } else {
107 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
108 ++$state['nwsecs'];
109
110 if ( $render ) {
111 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
112 } else {
113 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
114 }
115
116 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
117 $text = $q[1];
118 }
119 }
120
121 if( $this->mOptions->getUseTeX() ) {
122 while ( "" != $stripped ) {
123 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
124 $stripped2 .= $p[0];
125 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
126 $stripped = "";
127 } else {
128 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
129 ++$state['mathsecs'];
130
131 if ( $render ) {
132 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
133 } else {
134 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
135 }
136
137 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
138 $stripped = $q[1];
139 }
140 }
141 } else {
142 $stripped2 = $stripped;
143 }
144
145 while ( "" != $stripped2 ) {
146 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
147 $stripped3 .= $p[0];
148 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
149 $stripped2 = "";
150 } else {
151 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
152 ++$state['presecs'];
153
154 if ( $render ) {
155 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
156 } else {
157 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
158 }
159
160 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
161 $stripped2 = $q[1];
162 }
163 }
164 return $stripped3;
165 }
166
167 function unstrip( $text, &$state )
168 {
169 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
170 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
171 }
172
173 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
174 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
175 }
176
177 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
178 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
179 }
180 return $text;
181 }
182
183 function categoryMagic ()
184 {
185 global $wgLang , $wgUser ;
186 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
187 $id = $this->mTitle->getArticleID() ;
188 $cat = ucfirst ( wfMsg ( "category" ) ) ;
189 $ti = $this->mTitle->getText() ;
190 $ti = explode ( ":" , $ti , 2 ) ;
191 if ( $cat != $ti[0] ) return "" ;
192 $r = "<br break=all>\n" ;
193
194 $articles = array() ;
195 $parents = array () ;
196 $children = array() ;
197
198
199 # $sk =& $this->mGetSkin();
200 $sk =& $wgUser->getSkin() ;
201
202 $doesexist = false ;
203 if ( $doesexist ) {
204 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
205 } else {
206 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
207 }
208
209 $res = wfQuery ( $sql, DB_READ ) ;
210 while ( $x = wfFetchObject ( $res ) )
211 {
212 # $t = new Title ;
213 # $t->newFromDBkey ( $x->l_from ) ;
214 # $t = $t->getText() ;
215 if ( $doesexist ) {
216 $t = $x->l_from ;
217 } else {
218 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
219 if ( $t != "" ) $t .= ":" ;
220 $t .= $x->cur_title ;
221 }
222
223 $y = explode ( ":" , $t , 2 ) ;
224 if ( count ( $y ) == 2 && $y[0] == $cat ) {
225 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
226 } else {
227 array_push ( $articles , $sk->makeLink ( $t ) ) ;
228 }
229 }
230 wfFreeResult ( $res ) ;
231
232 # Children
233 if ( count ( $children ) > 0 )
234 {
235 asort ( $children ) ;
236 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
237 $r .= implode ( ", " , $children ) ;
238 }
239
240 # Articles
241 if ( count ( $articles ) > 0 )
242 {
243 asort ( $articles ) ;
244 $h = wfMsg( "category_header", $ti[1] );
245 $r .= "<h2>{$h}</h2>\n" ;
246 $r .= implode ( ", " , $articles ) ;
247 }
248
249
250 return $r ;
251 }
252
253 function getHTMLattrs ()
254 {
255 $htmlattrs = array( # Allowed attributes--no scripting, etc.
256 "title", "align", "lang", "dir", "width", "height",
257 "bgcolor", "clear", /* BR */ "noshade", /* HR */
258 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
259 /* FONT */ "type", "start", "value", "compact",
260 /* For various lists, mostly deprecated but safe */
261 "summary", "width", "border", "frame", "rules",
262 "cellspacing", "cellpadding", "valign", "char",
263 "charoff", "colgroup", "col", "span", "abbr", "axis",
264 "headers", "scope", "rowspan", "colspan", /* Tables */
265 "id", "class", "name", "style" /* For CSS */
266 );
267 return $htmlattrs ;
268 }
269
270 function fixTagAttributes ( $t )
271 {
272 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
273 $htmlattrs = $this->getHTMLattrs() ;
274
275 # Strip non-approved attributes from the tag
276 $t = preg_replace(
277 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
278 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
279 $t);
280 # Strip javascript "expression" from stylesheets. Brute force approach:
281 # If anythin offensive is found, all attributes of the HTML tag are dropped
282
283 if( preg_match(
284 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
285 wfMungeToUtf8( $t ) ) )
286 {
287 $t="";
288 }
289
290 return trim ( $t ) ;
291 }
292
293 function doTableStuff ( $t )
294 {
295 $t = explode ( "\n" , $t ) ;
296 $td = array () ; # Is currently a td tag open?
297 $ltd = array () ; # Was it TD or TH?
298 $tr = array () ; # Is currently a tr tag open?
299 $ltr = array () ; # tr attributes
300 foreach ( $t AS $k => $x )
301 {
302 $x = rtrim ( $x ) ;
303 $fc = substr ( $x , 0 , 1 ) ;
304 if ( "{|" == substr ( $x , 0 , 2 ) )
305 {
306 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
307 array_push ( $td , false ) ;
308 array_push ( $ltd , "" ) ;
309 array_push ( $tr , false ) ;
310 array_push ( $ltr , "" ) ;
311 }
312 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
313 else if ( "|}" == substr ( $x , 0 , 2 ) )
314 {
315 $z = "</table>\n" ;
316 $l = array_pop ( $ltd ) ;
317 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
318 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
319 array_pop ( $ltr ) ;
320 $t[$k] = $z ;
321 }
322 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
323 {
324 $z = trim ( substr ( $x , 2 ) ) ;
325 $t[$k] = "<caption>{$z}</caption>\n" ;
326 }*/
327 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
328 {
329 $x = substr ( $x , 1 ) ;
330 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
331 $z = "" ;
332 $l = array_pop ( $ltd ) ;
333 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
334 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
335 array_pop ( $ltr ) ;
336 $t[$k] = $z ;
337 array_push ( $tr , false ) ;
338 array_push ( $td , false ) ;
339 array_push ( $ltd , "" ) ;
340 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
341 }
342 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
343 {
344 if ( "|+" == substr ( $x , 0 , 2 ) )
345 {
346 $fc = "+" ;
347 $x = substr ( $x , 1 ) ;
348 }
349 $after = substr ( $x , 1 ) ;
350 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
351 $after = explode ( "||" , $after ) ;
352 $t[$k] = "" ;
353 foreach ( $after AS $theline )
354 {
355 $z = "" ;
356 if ( $fc != "+" )
357 {
358 $tra = array_pop ( $ltr ) ;
359 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
360 array_push ( $tr , true ) ;
361 array_push ( $ltr , "" ) ;
362 }
363
364 $l = array_pop ( $ltd ) ;
365 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
366 if ( $fc == "|" ) $l = "TD" ;
367 else if ( $fc == "!" ) $l = "TH" ;
368 else if ( $fc == "+" ) $l = "CAPTION" ;
369 else $l = "" ;
370 array_push ( $ltd , $l ) ;
371 $y = explode ( "|" , $theline , 2 ) ;
372 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
373 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
374 $t[$k] .= $y ;
375 array_push ( $td , true ) ;
376 }
377 }
378 }
379
380 # Closing open td, tr && table
381 while ( count ( $td ) > 0 )
382 {
383 if ( array_pop ( $td ) ) $t[] = "</td>" ;
384 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
385 $t[] = "</table>" ;
386 }
387
388 $t = implode ( "\n" , $t ) ;
389 # $t = $this->removeHTMLtags( $t );
390 return $t ;
391 }
392
393 # Well, OK, it's actually about 14 passes. But since all the
394 # hard lifting is done inside PHP's regex code, it probably
395 # wouldn't speed things up much to add a real parser.
396 #
397 function doWikiPass2( $text, $linestart )
398 {
399 $fname = "OutputPage::doWikiPass2";
400 wfProfileIn( $fname );
401
402 $text = $this->removeHTMLtags( $text );
403 $text = $this->replaceVariables( $text );
404
405 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
406 $text = str_replace ( "<HR>", "<hr>", $text );
407
408 $text = $this->doHeadings( $text );
409 $text = $this->doBlockLevels( $text, $linestart );
410
411 if($this->mOptions->getUseDynamicDates()) {
412 global $wgDateFormatter;
413 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
414 }
415
416 $text = $this->replaceExternalLinks( $text );
417 $text = $this->replaceInternalLinks ( $text );
418 $text = $this->doTableStuff ( $text ) ;
419
420 $text = $this->formatHeadings( $text );
421
422 $sk =& $this->mOptions->getSkin();
423 $text = $sk->transformContent( $text );
424 $text .= $this->categoryMagic () ;
425
426 wfProfileOut( $fname );
427 return $text;
428 }
429
430
431 /* private */ function doHeadings( $text )
432 {
433 for ( $i = 6; $i >= 1; --$i ) {
434 $h = substr( "======", 0, $i );
435 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
436 "<h{$i}>\\1</h{$i}>\\2", $text );
437 }
438 return $text;
439 }
440
441 # Note: we have to do external links before the internal ones,
442 # and otherwise take great care in the order of things here, so
443 # that we don't end up interpreting some URLs twice.
444
445 /* private */ function replaceExternalLinks( $text )
446 {
447 $fname = "OutputPage::replaceExternalLinks";
448 wfProfileIn( $fname );
449 $text = $this->subReplaceExternalLinks( $text, "http", true );
450 $text = $this->subReplaceExternalLinks( $text, "https", true );
451 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
452 $text = $this->subReplaceExternalLinks( $text, "irc", false );
453 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
454 $text = $this->subReplaceExternalLinks( $text, "news", false );
455 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
456 wfProfileOut( $fname );
457 return $text;
458 }
459
460 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
461 {
462 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
463 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
464
465 # this is the list of separators that should be ignored if they
466 # are the last character of an URL but that should be included
467 # if they occur within the URL, e.g. "go to www.foo.com, where .."
468 # in this case, the last comma should not become part of the URL,
469 # but in "www.foo.com/123,2342,32.htm" it should.
470 $sep = ",;\.:";
471 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
472 $images = "gif|png|jpg|jpeg";
473
474 # PLEASE NOTE: The curly braces { } are not part of the regex,
475 # they are interpreted as part of the string (used to tell PHP
476 # that the content of the string should be inserted there).
477 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
478 "((?i){$images})([^{$uc}]|$)/";
479
480 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
481 $sk =& $this->mOptions->getSkin();
482
483 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
484 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
485 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
486 }
487 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
488 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
489 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
490 "</a>\\5", $s );
491 $s = str_replace( $unique, $protocol, $s );
492
493 $a = explode( "[{$protocol}:", " " . $s );
494 $s = array_shift( $a );
495 $s = substr( $s, 1 );
496
497 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
498 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
499
500 foreach ( $a as $line ) {
501 if ( preg_match( $e1, $line, $m ) ) {
502 $link = "{$protocol}:{$m[1]}";
503 $trail = $m[2];
504 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
505 else { $text = wfEscapeHTML( $link ); }
506 } else if ( preg_match( $e2, $line, $m ) ) {
507 $link = "{$protocol}:{$m[1]}";
508 $text = $m[2];
509 $trail = $m[3];
510 } else {
511 $s .= "[{$protocol}:" . $line;
512 continue;
513 }
514 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
515 else $paren = "";
516 $la = $sk->getExternalLinkAttributes( $link, $text );
517 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
518
519 }
520 return $s;
521 }
522
523 /* private */ function handle3Quotes( &$state, $token )
524 {
525 if ( $state["strong"] ) {
526 if ( $state["em"] && $state["em"] > $state["strong"] )
527 {
528 # ''' lala ''lala '''
529 $s = "</em></strong><em>";
530 } else {
531 $s = "</strong>";
532 }
533 $state["strong"] = FALSE;
534 } else {
535 $s = "<strong>";
536 $state["strong"] = $token["pos"];
537 }
538 return $s;
539 }
540
541 /* private */ function handle2Quotes( &$state, $token )
542 {
543 if ( $state["em"] ) {
544 if ( $state["strong"] && $state["strong"] > $state["em"] )
545 {
546 # ''lala'''lala'' ....'''
547 $s = "</strong></em><strong>";
548 } else {
549 $s = "</em>";
550 }
551 $state["em"] = FALSE;
552 } else {
553 $s = "<em>";
554 $state["em"] = $token["pos"];
555 }
556 return $s;
557 }
558
559 /* private */ function handle5Quotes( &$state, $token )
560 {
561 if ( $state["em"] && $state["strong"] ) {
562 if ( $state["em"] < $state["strong"] ) {
563 $s .= "</strong></em>";
564 } else {
565 $s .= "</em></strong>";
566 }
567 $state["strong"] = $state["em"] = FALSE;
568 } elseif ( $state["em"] ) {
569 $s .= "</em><strong>";
570 $state["em"] = FALSE;
571 $state["strong"] = $token["pos"];
572 } elseif ( $state["strong"] ) {
573 $s .= "</strong><em>";
574 $state["strong"] = FALSE;
575 $state["em"] = $token["pos"];
576 } else { # not $em and not $strong
577 $s .= "<strong><em>";
578 $state["strong"] = $state["em"] = $token["pos"];
579 }
580 return $s;
581 }
582
583 /* private */ function replaceInternalLinks( $str )
584 {
585 global $wgLang; # for language specific parser hook
586
587 $tokenizer=Tokenizer::newFromString( $str );
588 $tokenStack = array();
589
590 $s="";
591 $state["em"] = FALSE;
592 $state["strong"] = FALSE;
593 $tagIsOpen = FALSE;
594
595 # The tokenizer splits the text into tokens and returns them one by one.
596 # Every call to the tokenizer returns a new token.
597 while ( $token = $tokenizer->nextToken() )
598 {
599 switch ( $token["type"] )
600 {
601 case "text":
602 # simple text with no further markup
603 $txt = $token["text"];
604 break;
605 case "[[":
606 # link opening tag.
607 # FIXME : Treat orphaned open tags (stack not empty when text is over)
608 $tagIsOpen = TRUE;
609 array_push( $tokenStack, $token );
610 $txt="";
611 break;
612 case "]]":
613 # link close tag.
614 # get text from stack, glue it together, and call the code to handle a
615 # link
616 if ( count( $tokenStack ) == 0 )
617 {
618 # stack empty. Found a ]] without an opening [[
619 $txt = "]]";
620 } else {
621 $linkText = "";
622 $lastToken = array_pop( $tokenStack );
623 while ( $lastToken["type"] != "[[" )
624 {
625 if( !empty( $lastToken["text"] ) ) {
626 $linkText = $lastToken["text"] . $linkText;
627 }
628 $lastToken = array_pop( $tokenStack );
629 }
630 $txt = $linkText ."]]";
631 if( isset( $lastToken["text"] ) ) {
632 $prefix = $lastToken["text"];
633 } else {
634 $prefix = "";
635 }
636 $nextToken = $tokenizer->previewToken();
637 if ( $nextToken["type"] == "text" )
638 {
639 # Preview just looks at it. Now we have to fetch it.
640 $nextToken = $tokenizer->nextToken();
641 $txt .= $nextToken["text"];
642 }
643 $txt = $this->handleInternalLink( $txt, $prefix );
644 }
645 $tagIsOpen = (count( $tokenStack ) != 0);
646 break;
647 case "----":
648 $txt = "\n<hr>\n";
649 break;
650 case "'''":
651 # This and the three next ones handle quotes
652 $txt = $this->handle3Quotes( $state, $token );
653 break;
654 case "''":
655 $txt = $this->handle2Quotes( $state, $token );
656 break;
657 case "'''''":
658 $txt = $this->handle5Quotes( $state, $token );
659 break;
660 case "":
661 # empty token
662 $txt="";
663 break;
664 case "RFC ":
665 if ( $tagIsOpen ) {
666 $txt = "RFC ";
667 } else {
668 $txt = $this->doMagicRFC( $tokenizer );
669 }
670 break;
671 case "ISBN ":
672 if ( $tagIsOpen ) {
673 $txt = "ISBN ";
674 } else {
675 $txt = $this->doMagicISBN( $tokenizer );
676 }
677 break;
678 default:
679 # Call language specific Hook.
680 $txt = $wgLang->processToken( $token, $tokenStack );
681 if ( NULL == $txt ) {
682 # An unkown token. Highlight.
683 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
684 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
685 }
686 break;
687 }
688 # If we're parsing the interior of a link, don't append the interior to $s,
689 # but push it to the stack so it can be processed when a ]] token is found.
690 if ( $tagIsOpen && $txt != "" ) {
691 $token["type"] = "text";
692 $token["text"] = $txt;
693 array_push( $tokenStack, $token );
694 } else {
695 $s .= $txt;
696 }
697 } #end while
698 if ( count( $tokenStack ) != 0 )
699 {
700 # still objects on stack. opened [[ tag without closing ]] tag.
701 $txt = "";
702 while ( $lastToken = array_pop( $tokenStack ) )
703 {
704 if ( $lastToken["type"] == "text" )
705 {
706 $txt = $lastToken["text"] . $txt;
707 } else {
708 $txt = $lastToken["type"] . $txt;
709 }
710 }
711 $s .= $txt;
712 }
713 return $s;
714 }
715
716 /* private */ function handleInternalLink( $line, $prefix )
717 {
718 global $wgLang, $wgLinkCache;
719 global $wgNamespacesWithSubpages, $wgLanguageCode;
720 static $fname = "OutputPage::replaceInternalLinks" ;
721 wfProfileIn( $fname );
722
723 wfProfileIn( "$fname-setup" );
724 static $tc = FALSE;
725 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
726 $sk =& $this->mOptions->getSkin();
727
728 # Match a link having the form [[namespace:link|alternate]]trail
729 static $e1 = FALSE;
730 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
731 # Match the end of a line for a word that's not followed by whitespace,
732 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
733 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
734 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
735 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
736
737
738 # Special and Media are pseudo-namespaces; no pages actually exist in them
739 static $image = FALSE;
740 static $special = FALSE;
741 static $media = FALSE;
742 static $category = FALSE;
743 if ( !$image ) { $image = Namespace::getImage(); }
744 if ( !$special ) { $special = Namespace::getSpecial(); }
745 if ( !$media ) { $media = Namespace::getMedia(); }
746 if ( !$category ) { $category = wfMsg ( "category" ) ; }
747
748 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
749
750 wfProfileOut( "$fname-setup" );
751 $s = "";
752
753 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
754 $text = $m[2];
755 $trail = $m[3];
756 } else { # Invalid form; output directly
757 $s .= $prefix . "[[" . $line ;
758 return $s;
759 }
760
761 /* Valid link forms:
762 Foobar -- normal
763 :Foobar -- override special treatment of prefix (images, language links)
764 /Foobar -- convert to CurrentPage/Foobar
765 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
766 */
767 $c = substr($m[1],0,1);
768 $noforce = ($c != ":");
769 if( $c == "/" ) { # subpage
770 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
771 $m[1]=substr($m[1],1,strlen($m[1])-2);
772 $noslash=$m[1];
773 } else {
774 $noslash=substr($m[1],1);
775 }
776 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
777 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
778 if( "" == $text ) {
779 $text= $m[1];
780 } # this might be changed for ugliness reasons
781 } else {
782 $link = $noslash; # no subpage allowed, use standard link
783 }
784 } elseif( $noforce ) { # no subpage
785 $link = $m[1];
786 } else {
787 $link = substr( $m[1], 1 );
788 }
789 if( "" == $text )
790 $text = $link;
791
792 $nt = Title::newFromText( $link );
793 if( !$nt ) {
794 $s .= $prefix . "[[" . $line;
795 return $s;
796 }
797 $ns = $nt->getNamespace();
798 $iw = $nt->getInterWiki();
799 if( $noforce ) {
800 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
801 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
802 $s .= $prefix . $trail;
803 return $s;
804 }
805 if( $ns == $image ) {
806 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
807 $wgLinkCache->addImageLinkObj( $nt );
808 return $s;
809 }
810 }
811 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
812 ( strpos( $link, "#" ) == FALSE ) ) {
813 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
814 return $s;
815 }
816 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
817 $t = explode ( ":" , $nt->getText() ) ;
818 array_shift ( $t ) ;
819 $t = implode ( ":" , $t ) ;
820 $t = $wgLang->ucFirst ( $t ) ;
821 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
822 $nnt = Title::newFromText ( $category.":".$t ) ;
823 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
824 $this->mCategoryLinks[] = $t ;
825 $s .= $prefix . $trail ;
826 return $s ;
827 }
828 if( $ns == $media ) {
829 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
830 $wgLinkCache->addImageLinkObj( $nt );
831 return $s;
832 } elseif( $ns == $special ) {
833 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
834 return $s;
835 }
836 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
837
838 wfProfileOut( $fname );
839 return $s;
840 }
841
842 # Some functions here used by doBlockLevels()
843 #
844 /* private */ function closeParagraph()
845 {
846 $result = "";
847 if ( 0 != strcmp( "p", $this->mLastSection ) &&
848 0 != strcmp( "", $this->mLastSection ) ) {
849 $result = "</" . $this->mLastSection . ">";
850 }
851 $this->mLastSection = "";
852 return $result."\n";
853 }
854 # getCommon() returns the length of the longest common substring
855 # of both arguments, starting at the beginning of both.
856 #
857 /* private */ function getCommon( $st1, $st2 )
858 {
859 $fl = strlen( $st1 );
860 $shorter = strlen( $st2 );
861 if ( $fl < $shorter ) { $shorter = $fl; }
862
863 for ( $i = 0; $i < $shorter; ++$i ) {
864 if ( $st1{$i} != $st2{$i} ) { break; }
865 }
866 return $i;
867 }
868 # These next three functions open, continue, and close the list
869 # element appropriate to the prefix character passed into them.
870 #
871 /* private */ function openList( $char )
872 {
873 $result = $this->closeParagraph();
874
875 if ( "*" == $char ) { $result .= "<ul><li>"; }
876 else if ( "#" == $char ) { $result .= "<ol><li>"; }
877 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
878 else if ( ";" == $char ) {
879 $result .= "<dl><dt>";
880 $this->mDTopen = true;
881 }
882 else { $result = "<!-- ERR 1 -->"; }
883
884 return $result;
885 }
886
887 /* private */ function nextItem( $char )
888 {
889 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
890 else if ( ":" == $char || ";" == $char ) {
891 $close = "</dd>";
892 if ( $this->mDTopen ) { $close = "</dt>"; }
893 if ( ";" == $char ) {
894 $this->mDTopen = true;
895 return $close . "<dt>";
896 } else {
897 $this->mDTopen = false;
898 return $close . "<dd>";
899 }
900 }
901 return "<!-- ERR 2 -->";
902 }
903
904 /* private */function closeList( $char )
905 {
906 if ( "*" == $char ) { $text = "</li></ul>"; }
907 else if ( "#" == $char ) { $text = "</li></ol>"; }
908 else if ( ":" == $char ) {
909 if ( $this->mDTopen ) {
910 $this->mDTopen = false;
911 $text = "</dt></dl>";
912 } else {
913 $text = "</dd></dl>";
914 }
915 }
916 else { return "<!-- ERR 3 -->"; }
917 return $text."\n";
918 }
919
920 /* private */ function doBlockLevels( $text, $linestart )
921 {
922 $fname = "OutputPage::doBlockLevels";
923 wfProfileIn( $fname );
924 # Parsing through the text line by line. The main thing
925 # happening here is handling of block-level elements p, pre,
926 # and making lists from lines starting with * # : etc.
927 #
928 $a = explode( "\n", $text );
929 $text = $lastPref = "";
930 $this->mDTopen = $inBlockElem = false;
931
932 if ( ! $linestart ) { $text .= array_shift( $a ); }
933 foreach ( $a as $t ) {
934 if ( "" != $text ) { $text .= "\n"; }
935
936 $oLine = $t;
937 $opl = strlen( $lastPref );
938 $npl = strspn( $t, "*#:;" );
939 $pref = substr( $t, 0, $npl );
940 $pref2 = str_replace( ";", ":", $pref );
941 $t = substr( $t, $npl );
942
943 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
944 $text .= $this->nextItem( substr( $pref, -1 ) );
945
946 if ( ";" == substr( $pref, -1 ) ) {
947 $cpos = strpos( $t, ":" );
948 if ( ! ( false === $cpos ) ) {
949 $term = substr( $t, 0, $cpos );
950 $text .= $term . $this->nextItem( ":" );
951 $t = substr( $t, $cpos + 1 );
952 }
953 }
954 } else if (0 != $npl || 0 != $opl) {
955 $cpl = $this->getCommon( $pref, $lastPref );
956
957 while ( $cpl < $opl ) {
958 $text .= $this->closeList( $lastPref{$opl-1} );
959 --$opl;
960 }
961 if ( $npl <= $cpl && $cpl > 0 ) {
962 $text .= $this->nextItem( $pref{$cpl-1} );
963 }
964 while ( $npl > $cpl ) {
965 $char = substr( $pref, $cpl, 1 );
966 $text .= $this->openList( $char );
967
968 if ( ";" == $char ) {
969 $cpos = strpos( $t, ":" );
970 if ( ! ( false === $cpos ) ) {
971 $term = substr( $t, 0, $cpos );
972 $text .= $term . $this->nextItem( ":" );
973 $t = substr( $t, $cpos + 1 );
974 }
975 }
976 ++$cpl;
977 }
978 $lastPref = $pref2;
979 }
980 if ( 0 == $npl ) { # No prefix--go to paragraph mode
981 if ( preg_match(
982 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
983 $text .= $this->closeParagraph();
984 $inBlockElem = true;
985 }
986 if ( ! $inBlockElem ) {
987 if ( " " == $t{0} ) {
988 $newSection = "pre";
989 # $t = wfEscapeHTML( $t );
990 }
991 else { $newSection = "p"; }
992
993 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
994 $text .= $this->closeParagraph();
995 $text .= "<" . $newSection . ">";
996 } else if ( 0 != strcmp( $this->mLastSection,
997 $newSection ) ) {
998 $text .= $this->closeParagraph();
999 if ( 0 != strcmp( "p", $newSection ) ) {
1000 $text .= "<" . $newSection . ">";
1001 }
1002 }
1003 $this->mLastSection = $newSection;
1004 }
1005 if ( $inBlockElem &&
1006 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
1007 $inBlockElem = false;
1008 }
1009 }
1010 $text .= $t;
1011 }
1012 while ( $npl ) {
1013 $text .= $this->closeList( $pref2{$npl-1} );
1014 --$npl;
1015 }
1016 if ( "" != $this->mLastSection ) {
1017 if ( "p" != $this->mLastSection ) {
1018 $text .= "</" . $this->mLastSection . ">";
1019 }
1020 $this->mLastSection = "";
1021 }
1022 wfProfileOut( $fname );
1023 return $text;
1024 }
1025
1026 /* private */ function replaceVariables( $text )
1027 {
1028 global $wgLang, $wgCurOut;
1029 $fname = "OutputPage::replaceVariables";
1030 wfProfileIn( $fname );
1031
1032 $magic = array();
1033
1034 # Basic variables
1035 # See Language.php for the definition of each magic word
1036 # As with sigs, this uses the server's local time -- ensure
1037 # this is appropriate for your audience!
1038
1039 $magic[MAG_CURRENTMONTH] = date( "m" );
1040 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1041 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1042 $magic[MAG_CURRENTDAY] = date("j");
1043 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1044 $magic[MAG_CURRENTYEAR] = date( "Y" );
1045 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1046
1047 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1048
1049 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1050 if ( $mw->match( $text ) ) {
1051 $v = wfNumberOfArticles();
1052 $text = $mw->replace( $v, $text );
1053 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1054 }
1055
1056 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1057 # The callbacks are at the bottom of this file
1058 $wgCurOut = $this;
1059 $mw =& MagicWord::get( MAG_MSG );
1060 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1061 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1062
1063 $mw =& MagicWord::get( MAG_MSGNW );
1064 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1065 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1066
1067 wfProfileOut( $fname );
1068 return $text;
1069 }
1070
1071 # Cleans up HTML, removes dangerous tags and attributes
1072 /* private */ function removeHTMLtags( $text )
1073 {
1074 $fname = "OutputPage::removeHTMLtags";
1075 wfProfileIn( $fname );
1076 $htmlpairs = array( # Tags that must be closed
1077 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1078 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1079 "strike", "strong", "tt", "var", "div", "center",
1080 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1081 "ruby", "rt" , "rb" , "rp"
1082 );
1083 $htmlsingle = array(
1084 "br", "p", "hr", "li", "dt", "dd"
1085 );
1086 $htmlnest = array( # Tags that can be nested--??
1087 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1088 "dl", "font", "big", "small", "sub", "sup"
1089 );
1090 $tabletags = array( # Can only appear inside table
1091 "td", "th", "tr"
1092 );
1093
1094 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1095 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1096
1097 $htmlattrs = $this->getHTMLattrs () ;
1098
1099 # Remove HTML comments
1100 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1101
1102 $bits = explode( "<", $text );
1103 $text = array_shift( $bits );
1104 $tagstack = array(); $tablestack = array();
1105
1106 foreach ( $bits as $x ) {
1107 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1108 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1109 $x, $regs );
1110 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1111 error_reporting( $prev );
1112
1113 $badtag = 0 ;
1114 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1115 # Check our stack
1116 if ( $slash ) {
1117 # Closing a tag...
1118 if ( ! in_array( $t, $htmlsingle ) &&
1119 ( $ot = array_pop( $tagstack ) ) != $t ) {
1120 array_push( $tagstack, $ot );
1121 $badtag = 1;
1122 } else {
1123 if ( $t == "table" ) {
1124 $tagstack = array_pop( $tablestack );
1125 }
1126 $newparams = "";
1127 }
1128 } else {
1129 # Keep track for later
1130 if ( in_array( $t, $tabletags ) &&
1131 ! in_array( "table", $tagstack ) ) {
1132 $badtag = 1;
1133 } else if ( in_array( $t, $tagstack ) &&
1134 ! in_array ( $t , $htmlnest ) ) {
1135 $badtag = 1 ;
1136 } else if ( ! in_array( $t, $htmlsingle ) ) {
1137 if ( $t == "table" ) {
1138 array_push( $tablestack, $tagstack );
1139 $tagstack = array();
1140 }
1141 array_push( $tagstack, $t );
1142 }
1143 # Strip non-approved attributes from the tag
1144 $newparams = $this->fixTagAttributes($params);
1145
1146 }
1147 if ( ! $badtag ) {
1148 $rest = str_replace( ">", "&gt;", $rest );
1149 $text .= "<$slash$t $newparams$brace$rest";
1150 continue;
1151 }
1152 }
1153 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1154 }
1155 # Close off any remaining tags
1156 while ( $t = array_pop( $tagstack ) ) {
1157 $text .= "</$t>\n";
1158 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1159 }
1160 wfProfileOut( $fname );
1161 return $text;
1162 }
1163
1164 /*
1165 *
1166 * This function accomplishes several tasks:
1167 * 1) Auto-number headings if that option is enabled
1168 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1169 * 3) Add a Table of contents on the top for users who have enabled the option
1170 * 4) Auto-anchor headings
1171 *
1172 * It loops through all headlines, collects the necessary data, then splits up the
1173 * string and re-inserts the newly formatted headlines.
1174 *
1175 * */
1176 /* private */ function formatHeadings( $text )
1177 {
1178 $nh=$this->mOptions->getNumberHeadings();
1179 $st=$this->mOptions->getShowToc();
1180 if(!$this->mTitle->userCanEdit()) {
1181 $es=0;
1182 $esr=0;
1183 } else {
1184 $es=$this->mOptions->getEditSection();
1185 $esr=$this->mOptions->getEditSectionOnRightClick();
1186 }
1187
1188 # Inhibit editsection links if requested in the page
1189 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1190 if ($esw->matchAndRemove( $text )) {
1191 $es=0;
1192 }
1193 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1194 # do not add TOC
1195 $mw =& MagicWord::get( MAG_NOTOC );
1196 if ($mw->matchAndRemove( $text ))
1197 {
1198 $st = 0;
1199 }
1200
1201 # never add the TOC to the Main Page. This is an entry page that should not
1202 # be more than 1-2 screens large anyway
1203 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1204
1205 # We need this to perform operations on the HTML
1206 $sk =& $this->mOptions->getSkin();
1207
1208 # Get all headlines for numbering them and adding funky stuff like [edit]
1209 # links
1210 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1211
1212 # headline counter
1213 $c=0;
1214
1215 # Ugh .. the TOC should have neat indentation levels which can be
1216 # passed to the skin functions. These are determined here
1217 $toclevel = 0;
1218 $toc = "";
1219 $full = "";
1220 $head = array();
1221 foreach($matches[3] as $headline) {
1222 if($level) { $prevlevel=$level;}
1223 $level=$matches[1][$c];
1224 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1225
1226 $h[$level]=0; // reset when we enter a new level
1227 $toc.=$sk->tocIndent($level-$prevlevel);
1228 $toclevel+=$level-$prevlevel;
1229
1230 }
1231 if(($nh||$st) && $level<$prevlevel) {
1232 $h[$level+1]=0; // reset when we step back a level
1233 $toc.=$sk->tocUnindent($prevlevel-$level);
1234 $toclevel-=$prevlevel-$level;
1235
1236 }
1237 $h[$level]++; // count number of headlines for each level
1238
1239 if($nh||$st) {
1240 for($i=1;$i<=$level;$i++) {
1241 if($h[$i]) {
1242 if($dot) {$numbering.=".";}
1243 $numbering.=$h[$i];
1244 $dot=1;
1245 }
1246 }
1247 }
1248
1249 // The canonized header is a version of the header text safe to use for links
1250 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1251 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1252 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1253 $tocline = trim( $canonized_headline );
1254 $canonized_headline=str_replace('"',"",$canonized_headline);
1255 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1256 $refer[$c]=$canonized_headline;
1257 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1258 $refcount[$c]=$refers[$canonized_headline];
1259
1260 // Prepend the number to the heading text
1261
1262 if($nh||$st) {
1263 $tocline=$numbering ." ". $tocline;
1264
1265 // Don't number the heading if it is the only one (looks silly)
1266 if($nh && count($matches[3]) > 1) {
1267 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1268 }
1269 }
1270
1271 // Create the anchor for linking from the TOC to the section
1272 $anchor=$canonized_headline;
1273 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1274 if($st) {
1275 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1276 }
1277 if($es) {
1278 $head[$c].=$sk->editSectionLink($c+1);
1279 }
1280
1281 // Put it all together
1282
1283 $head[$c].="<h".$level.$matches[2][$c]
1284 ."<a name=\"".$anchor."\">"
1285 .$headline
1286 ."</a>"
1287 ."</h".$level.">";
1288
1289 // Add the edit section link
1290
1291 if($esr) {
1292 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1293 }
1294
1295 $numbering="";
1296 $c++;
1297 $dot=0;
1298 }
1299
1300 if($st) {
1301 $toclines=$c;
1302 $toc.=$sk->tocUnindent($toclevel);
1303 $toc=$sk->tocTable($toc);
1304 }
1305
1306 // split up and insert constructed headlines
1307
1308 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1309 $i=0;
1310
1311 foreach($blocks as $block) {
1312 if(($es) && $c>0 && $i==0) {
1313 # This is the [edit] link that appears for the top block of text when
1314 # section editing is enabled
1315 $full.=$sk->editSectionLink(0);
1316 }
1317 $full.=$block;
1318 if($st && $toclines>3 && !$i) {
1319 # Let's add a top anchor just in case we want to link to the top of the page
1320 $full="<a name=\"top\"></a>".$full.$toc;
1321 }
1322
1323 if( !empty( $head[$i] ) ) {
1324 $full .= $head[$i];
1325 }
1326 $i++;
1327 }
1328
1329 return $full;
1330 }
1331
1332 /* private */ function doMagicISBN( &$tokenizer )
1333 {
1334 global $wgLang;
1335
1336 # Check whether next token is a text token
1337 # If yes, fetch it and convert the text into a
1338 # Special::BookSources link
1339 $token = $tokenizer->previewToken();
1340 while ( $token["type"] == "" )
1341 {
1342 $tokenizer->nextToken();
1343 $token = $tokenizer->previewToken();
1344 }
1345 if ( $token["type"] == "text" )
1346 {
1347 $token = $tokenizer->nextToken();
1348 $x = $token["text"];
1349 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1350
1351 $isbn = $blank = "" ;
1352 while ( " " == $x{0} ) {
1353 $blank .= " ";
1354 $x = substr( $x, 1 );
1355 }
1356 while ( strstr( $valid, $x{0} ) != false ) {
1357 $isbn .= $x{0};
1358 $x = substr( $x, 1 );
1359 }
1360 $num = str_replace( "-", "", $isbn );
1361 $num = str_replace( " ", "", $num );
1362
1363 if ( "" == $num ) {
1364 $text = "ISBN $blank$x";
1365 } else {
1366 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1367 $text = "<a href=\"" .
1368 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
1369 "\" class=\"internal\">ISBN $isbn</a>";
1370 $text .= $x;
1371 }
1372 } else {
1373 $text = "ISBN ";
1374 }
1375 return $text;
1376 }
1377 /* private */ function doMagicRFC( &$tokenizer )
1378 {
1379 global $wgLang;
1380
1381 # Check whether next token is a text token
1382 # If yes, fetch it and convert the text into a
1383 # link to an RFC source
1384 $token = $tokenizer->previewToken();
1385 while ( $token["type"] == "" )
1386 {
1387 $tokenizer->nextToken();
1388 $token = $tokenizer->previewToken();
1389 }
1390 if ( $token["type"] == "text" )
1391 {
1392 $token = $tokenizer->nextToken();
1393 $x = $token["text"];
1394 $valid = "0123456789";
1395
1396 $rfc = $blank = "" ;
1397 while ( " " == $x{0} ) {
1398 $blank .= " ";
1399 $x = substr( $x, 1 );
1400 }
1401 while ( strstr( $valid, $x{0} ) != false ) {
1402 $rfc .= $x{0};
1403 $x = substr( $x, 1 );
1404 }
1405
1406 if ( "" == $rfc ) {
1407 $text .= "RFC $blank$x";
1408 } else {
1409 $url = wfmsg( "rfcurl" );
1410 $url = str_replace( "$1", $rfc, $url);
1411 $sk =& $this->mOptions->getSkin();
1412 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1413 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1414 }
1415 } else {
1416 $text = "RFC ";
1417 }
1418 return $text;
1419 }
1420
1421 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1422 {
1423 $this->mOptions = $options;
1424 $this->mTitle = $title;
1425 if ( $clearState ) {
1426 $this->clearState;
1427 }
1428
1429 $stripState = false;
1430 $text = $this->strip( $text, $stripState, false );
1431 $text = $this->pstPass2( $text, $user );
1432 $text = $this->unstrip( $text, $stripState );
1433 return $text;
1434 }
1435
1436 /* private */ function pstPass2( $text, &$user )
1437 {
1438 global $wgLang, $wgLocaltimezone;
1439
1440 # Signatures
1441 #
1442 $n = $user->getName();
1443 $k = $user->getOption( "nickname" );
1444 if ( "" == $k ) { $k = $n; }
1445 if(isset($wgLocaltimezone)) {
1446 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1447 }
1448 /* Note: this is an ugly timezone hack for the European wikis */
1449 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1450 " (" . date( "T" ) . ")";
1451 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1452
1453 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1454 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1455 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1456 Namespace::getUser() ) . ":$n|$k]]", $text );
1457
1458 # Context links: [[|name]] and [[name (context)|]]
1459 #
1460 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1461 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1462 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1463 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1464
1465 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1466 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1467 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1468 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1469 # [[ns:page (cont)|]]
1470 $context = "";
1471 $t = $this->mTitle->getText();
1472 if ( preg_match( $conpat, $t, $m ) ) {
1473 $context = $m[2];
1474 }
1475 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1476 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1477 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1478
1479 if ( "" == $context ) {
1480 $text = preg_replace( $p2, "[[\\1]]", $text );
1481 } else {
1482 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1483 }
1484
1485 # {{SUBST:xxx}} variables
1486 #
1487 $mw =& MagicWord::get( MAG_SUBST );
1488 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1489
1490 # Trim trailing whitespace
1491 # MAG_END (__END__) tag allows for trailing
1492 # whitespace to be deliberately included
1493 $text = rtrim( $text );
1494 $mw =& MagicWord::get( MAG_END );
1495 $mw->matchAndRemove( $text );
1496
1497 return $text;
1498 }
1499
1500
1501 }
1502
1503 class ParserOutput
1504 {
1505 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1506
1507 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1508 $containsOldMagic = false )
1509 {
1510 $this->mText = $text;
1511 $this->mLanguageLinks = $languageLinks;
1512 $this->mCategoryLinks = $categoryLinks;
1513 $this->mContainsOldMagic = $containsOldMagic;
1514 }
1515
1516 function getText() { return $this->mText; }
1517 function getLanguageLinks() { return $this->mLanguageLinks; }
1518 function getCategoryLinks() { return $this->mCategoryLinks; }
1519 function containsOldMagic() { return $this->mContainsOldMagic; }
1520 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1521 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1522 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1523 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1524 }
1525
1526 class ParserOptions
1527 {
1528 # All variables are private
1529 var $mUseTeX; # Use texvc to expand <math> tags
1530 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1531 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1532 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1533 var $mAllowExternalImages; # Allow external images inline
1534 var $mSkin; # Reference to the preferred skin
1535 var $mDateFormat; # Date format index
1536 var $mEditSection; # Create "edit section" links
1537 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1538 var $mPrintable; # Generate printable output
1539 var $mNumberHeadings; # Automatically number headings
1540 var $mShowToc; # Show table of contents
1541
1542 function getUseTeX() { return $this->mUseTeX; }
1543 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1544 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1545 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1546 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1547 function getSkin() { return $this->mSkin; }
1548 function getDateFormat() { return $this->mDateFormat; }
1549 function getEditSection() { return $this->mEditSection; }
1550 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1551 function getPrintable() { return $this->mPrintable; }
1552 function getNumberHeadings() { return $this->mNumberHeadings; }
1553 function getShowToc() { return $this->mShowToc; }
1554
1555 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1556 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1557 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1558 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1559 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1560 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1561 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1562 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1563 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1564 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1565 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1566 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1567
1568 /* static */ function newFromUser( &$user )
1569 {
1570 $popts = new ParserOptions;
1571 $popts->initialiseFromUser( &$user );
1572 return $popts;
1573 }
1574
1575 function initialiseFromUser( &$userInput )
1576 {
1577 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1578
1579 if ( !$userInput ) {
1580 $user = new User;
1581 } else {
1582 $user =& $userInput;
1583 }
1584
1585 $this->mUseTeX = $wgUseTeX;
1586 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1587 $this->mUseDynamicDates = $wgUseDynamicDates;
1588 $this->mInterwikiMagic = $wgInterwikiMagic;
1589 $this->mAllowExternalImages = $wgAllowExternalImages;
1590 $this->mSkin =& $user->getSkin();
1591 $this->mDateFormat = $user->getOption( "date" );
1592 $this->mEditSection = $user->getOption( "editsection" );
1593 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1594 $this->mPrintable = false;
1595 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1596 $this->mShowToc = $user->getOption( "showtoc" );
1597 }
1598
1599
1600 }
1601
1602 # Regex callbacks, used in OutputPage::replaceVariables
1603
1604 # Just get rid of the dangerous stuff
1605 # Necessary because replaceVariables is called after removeHTMLtags,
1606 # and message text can come from any user
1607 function wfReplaceMsgVar( $matches ) {
1608 global $wgCurOut, $wgLinkCache;
1609 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1610 $wgLinkCache->suspend();
1611 $text = $wgCurOut->replaceInternalLinks( $text );
1612 $wgLinkCache->resume();
1613 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1614 return $text;
1615 }
1616
1617 # Effective <nowiki></nowiki>
1618 # Not real <nowiki> because this is called after nowiki sections are processed
1619 function wfReplaceMsgnwVar( $matches ) {
1620 global $wgCurOut, $wgLinkCache;
1621 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1622 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1623 return $text;
1624 }
1625
1626
1627
1628 ?>