disable RFC autolinking inside of [[links]]
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 include_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Converts wikitext to HTML.
8 #
9 # Globals used:
10 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurOut
11 #
12 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
13 #
14 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
15 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
16 # $wgLocaltimezone
17 #
18 # * only within ParserOptions
19
20 class Parser
21 {
22 # Cleared with clearState():
23 var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState;
24
25 # Temporary:
26 var $mOptions, $mTitle;
27
28 function Parser()
29 {
30 $this->clearState();
31 }
32
33 function clearState()
34 {
35 $this->mOutput = new ParserOutput;
36 $this->mAutonumber = 0;
37 $this->mLastSection = "";
38 $this->mDTopen = false;
39 $this->mStripState = false;
40 }
41
42 # First pass--just handle <nowiki> sections, pass the rest off
43 # to doWikiPass2() which does all the real work.
44 #
45 # Returns a ParserOutput
46 #
47 function parse( $text, &$title, $options, $linestart = true, $clearState = true )
48 {
49 $fname = "Parser::parse";
50 wfProfileIn( $fname );
51
52 if ( $clearState ) {
53 $this->clearState();
54 }
55
56 $this->mOptions = $options;
57 $this->mTitle =& $title;
58
59 $stripState = NULL;
60 $text = $this->strip( $text, $this->mStripState, true );
61 $text = $this->doWikiPass2( $text, $linestart );
62 $text = $this->unstrip( $text, $this->mStripState );
63
64 $this->mOutput->setText( $text );
65 wfProfileOut( $fname );
66 return $this->mOutput;
67 }
68
69 /* static */ function getRandomString()
70 {
71 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
72 }
73
74 # Strips <nowiki>, <pre> and <math>
75 # Returns the text, and fills an array with data needed in unstrip()
76 #
77 function strip( $text, &$state, $render = true )
78 {
79 $state = array(
80 'nwlist' => array(),
81 'nwsecs' => 0,
82 'nwunq' => Parser::getRandomString(),
83 'mathlist' => array(),
84 'mathsecs' => 0,
85 'mathunq' => Parser::getRandomString(),
86 'prelist' => array(),
87 'presecs' => 0,
88 'preunq' => Parser::getRandomString()
89 );
90
91 $stripped = "";
92 $stripped2 = "";
93 $stripped3 = "";
94
95 # Replace any instances of the placeholders
96 $text = str_replace( $state['nwunq'], wfHtmlEscapeFirst( $state['nwunq'] ), $text );
97 $text = str_replace( $state['mathunq'], wfHtmlEscapeFirst( $state['mathunq'] ), $text );
98 $text = str_replace( $state['preunq'], wfHtmlEscapeFirst( $state['preunq'] ), $text );
99
100 while ( "" != $text ) {
101 $p = preg_split( "/<\\s*nowiki\\s*>/i", $text, 2 );
102 $stripped .= $p[0];
103 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
104 $text = "";
105 } else {
106 $q = preg_split( "/<\\/\\s*nowiki\\s*>/i", $p[1], 2 );
107 ++$state['nwsecs'];
108
109 if ( $render ) {
110 $state['nwlist'][$state['nwsecs']] = wfEscapeHTMLTagsOnly($q[0]);
111 } else {
112 $state['nwlist'][$state['nwsecs']] = "<nowiki>{$q[0]}</nowiki>";
113 }
114
115 $stripped .= $state['nwunq'] . sprintf("%08X", $state['nwsecs']);
116 $text = $q[1];
117 }
118 }
119
120 if( $this->mOptions->getUseTeX() ) {
121 while ( "" != $stripped ) {
122 $p = preg_split( "/<\\s*math\\s*>/i", $stripped, 2 );
123 $stripped2 .= $p[0];
124 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
125 $stripped = "";
126 } else {
127 $q = preg_split( "/<\\/\\s*math\\s*>/i", $p[1], 2 );
128 ++$state['mathsecs'];
129
130 if ( $render ) {
131 $state['mathlist'][$state['mathsecs']] = renderMath($q[0]);
132 } else {
133 $state['mathlist'][$state['mathsecs']] = "<math>{$q[0]}</math>";
134 }
135
136 $stripped2 .= $state['mathunq'] . sprintf("%08X", $state['mathsecs']);
137 $stripped = $q[1];
138 }
139 }
140 } else {
141 $stripped2 = $stripped;
142 }
143
144 while ( "" != $stripped2 ) {
145 $p = preg_split( "/<\\s*pre\\s*>/i", $stripped2, 2 );
146 $stripped3 .= $p[0];
147 if ( ( count( $p ) < 2 ) || ( "" == $p[1] ) ) {
148 $stripped2 = "";
149 } else {
150 $q = preg_split( "/<\\/\\s*pre\\s*>/i", $p[1], 2 );
151 ++$state['presecs'];
152
153 if ( $render ) {
154 $state['prelist'][$state['presecs']] = "<pre>". wfEscapeHTMLTagsOnly($q[0]). "</pre>\n";
155 } else {
156 $state['prelist'][$state['presecs']] = "<pre>{$q[0]}</pre>";
157 }
158
159 $stripped3 .= $state['preunq'] . sprintf("%08X", $state['presecs']);
160 $stripped2 = $q[1];
161 }
162 }
163 return $stripped3;
164 }
165
166 function unstrip( $text, &$state )
167 {
168 for ( $i = 1; $i <= $state['presecs']; ++$i ) {
169 $text = str_replace( $state['preunq'] . sprintf("%08X", $i), $state['prelist'][$i], $text );
170 }
171
172 for ( $i = 1; $i <= $state['mathsecs']; ++$i ) {
173 $text = str_replace( $state['mathunq'] . sprintf("%08X", $i), $state['mathlist'][$i], $text );
174 }
175
176 for ( $i = 1; $i <= $state['nwsecs']; ++$i ) {
177 $text = str_replace( $state['nwunq'] . sprintf("%08X", $i), $state['nwlist'][$i], $text );
178 }
179 return $text;
180 }
181
182 function categoryMagic ()
183 {
184 global $wgLang , $wgUser ;
185 if ( !$this->mOptions->getUseCategoryMagic() ) return ;
186 $id = $this->mTitle->getArticleID() ;
187 $cat = ucfirst ( wfMsg ( "category" ) ) ;
188 $ti = $this->mTitle->getText() ;
189 $ti = explode ( ":" , $ti , 2 ) ;
190 if ( $cat != $ti[0] ) return "" ;
191 $r = "<br break=all>\n" ;
192
193 $articles = array() ;
194 $parents = array () ;
195 $children = array() ;
196
197
198 # $sk =& $this->mGetSkin();
199 $sk =& $wgUser->getSkin() ;
200
201 $doesexist = false ;
202 if ( $doesexist ) {
203 $sql = "SELECT l_from FROM links WHERE l_to={$id}" ;
204 } else {
205 $sql = "SELECT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ;
206 }
207
208 $res = wfQuery ( $sql, DB_READ ) ;
209 while ( $x = wfFetchObject ( $res ) )
210 {
211 # $t = new Title ;
212 # $t->newFromDBkey ( $x->l_from ) ;
213 # $t = $t->getText() ;
214 if ( $doesexist ) {
215 $t = $x->l_from ;
216 } else {
217 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
218 if ( $t != "" ) $t .= ":" ;
219 $t .= $x->cur_title ;
220 }
221
222 $y = explode ( ":" , $t , 2 ) ;
223 if ( count ( $y ) == 2 && $y[0] == $cat ) {
224 array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ;
225 } else {
226 array_push ( $articles , $sk->makeLink ( $t ) ) ;
227 }
228 }
229 wfFreeResult ( $res ) ;
230
231 # Children
232 if ( count ( $children ) > 0 )
233 {
234 asort ( $children ) ;
235 $r .= "<h2>".wfMsg("subcategories")."</h2>\n" ;
236 $r .= implode ( ", " , $children ) ;
237 }
238
239 # Articles
240 if ( count ( $articles ) > 0 )
241 {
242 asort ( $articles ) ;
243 $h = wfMsg( "category_header", $ti[1] );
244 $r .= "<h2>{$h}</h2>\n" ;
245 $r .= implode ( ", " , $articles ) ;
246 }
247
248
249 return $r ;
250 }
251
252 function getHTMLattrs ()
253 {
254 $htmlattrs = array( # Allowed attributes--no scripting, etc.
255 "title", "align", "lang", "dir", "width", "height",
256 "bgcolor", "clear", /* BR */ "noshade", /* HR */
257 "cite", /* BLOCKQUOTE, Q */ "size", "face", "color",
258 /* FONT */ "type", "start", "value", "compact",
259 /* For various lists, mostly deprecated but safe */
260 "summary", "width", "border", "frame", "rules",
261 "cellspacing", "cellpadding", "valign", "char",
262 "charoff", "colgroup", "col", "span", "abbr", "axis",
263 "headers", "scope", "rowspan", "colspan", /* Tables */
264 "id", "class", "name", "style" /* For CSS */
265 );
266 return $htmlattrs ;
267 }
268
269 function fixTagAttributes ( $t )
270 {
271 if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-)
272 $htmlattrs = $this->getHTMLattrs() ;
273
274 # Strip non-approved attributes from the tag
275 $t = preg_replace(
276 "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e",
277 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
278 $t);
279 # Strip javascript "expression" from stylesheets. Brute force approach:
280 # If anythin offensive is found, all attributes of the HTML tag are dropped
281
282 if( preg_match(
283 "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is",
284 wfMungeToUtf8( $t ) ) )
285 {
286 $t="";
287 }
288
289 return trim ( $t ) ;
290 }
291
292 function doTableStuff ( $t )
293 {
294 $t = explode ( "\n" , $t ) ;
295 $td = array () ; # Is currently a td tag open?
296 $ltd = array () ; # Was it TD or TH?
297 $tr = array () ; # Is currently a tr tag open?
298 $ltr = array () ; # tr attributes
299 foreach ( $t AS $k => $x )
300 {
301 $x = rtrim ( $x ) ;
302 $fc = substr ( $x , 0 , 1 ) ;
303 if ( "{|" == substr ( $x , 0 , 2 ) )
304 {
305 $t[$k] = "<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . ">" ;
306 array_push ( $td , false ) ;
307 array_push ( $ltd , "" ) ;
308 array_push ( $tr , false ) ;
309 array_push ( $ltr , "" ) ;
310 }
311 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
312 else if ( "|}" == substr ( $x , 0 , 2 ) )
313 {
314 $z = "</table>\n" ;
315 $l = array_pop ( $ltd ) ;
316 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
317 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
318 array_pop ( $ltr ) ;
319 $t[$k] = $z ;
320 }
321 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
322 {
323 $z = trim ( substr ( $x , 2 ) ) ;
324 $t[$k] = "<caption>{$z}</caption>\n" ;
325 }*/
326 else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |---------------
327 {
328 $x = substr ( $x , 1 ) ;
329 while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
330 $z = "" ;
331 $l = array_pop ( $ltd ) ;
332 if ( array_pop ( $tr ) ) $z = "</tr>" . $z ;
333 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
334 array_pop ( $ltr ) ;
335 $t[$k] = $z ;
336 array_push ( $tr , false ) ;
337 array_push ( $td , false ) ;
338 array_push ( $ltd , "" ) ;
339 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
340 }
341 else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption
342 {
343 if ( "|+" == substr ( $x , 0 , 2 ) )
344 {
345 $fc = "+" ;
346 $x = substr ( $x , 1 ) ;
347 }
348 $after = substr ( $x , 1 ) ;
349 if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ;
350 $after = explode ( "||" , $after ) ;
351 $t[$k] = "" ;
352 foreach ( $after AS $theline )
353 {
354 $z = "" ;
355 if ( $fc != "+" )
356 {
357 $tra = array_pop ( $ltr ) ;
358 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
359 array_push ( $tr , true ) ;
360 array_push ( $ltr , "" ) ;
361 }
362
363 $l = array_pop ( $ltd ) ;
364 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
365 if ( $fc == "|" ) $l = "TD" ;
366 else if ( $fc == "!" ) $l = "TH" ;
367 else if ( $fc == "+" ) $l = "CAPTION" ;
368 else $l = "" ;
369 array_push ( $ltd , $l ) ;
370 $y = explode ( "|" , $theline , 2 ) ;
371 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
372 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
373 $t[$k] .= $y ;
374 array_push ( $td , true ) ;
375 }
376 }
377 }
378
379 # Closing open td, tr && table
380 while ( count ( $td ) > 0 )
381 {
382 if ( array_pop ( $td ) ) $t[] = "</td>" ;
383 if ( array_pop ( $tr ) ) $t[] = "</tr>" ;
384 $t[] = "</table>" ;
385 }
386
387 $t = implode ( "\n" , $t ) ;
388 # $t = $this->removeHTMLtags( $t );
389 return $t ;
390 }
391
392 # Well, OK, it's actually about 14 passes. But since all the
393 # hard lifting is done inside PHP's regex code, it probably
394 # wouldn't speed things up much to add a real parser.
395 #
396 function doWikiPass2( $text, $linestart )
397 {
398 $fname = "OutputPage::doWikiPass2";
399 wfProfileIn( $fname );
400
401 $text = $this->removeHTMLtags( $text );
402 $text = $this->replaceVariables( $text );
403
404 # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
405 $text = str_replace ( "<HR>", "<hr>", $text );
406
407 $text = $this->doHeadings( $text );
408 $text = $this->doBlockLevels( $text, $linestart );
409
410 if($this->mOptions->getUseDynamicDates()) {
411 global $wgDateFormatter;
412 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
413 }
414
415 $text = $this->replaceExternalLinks( $text );
416 $text = $this->replaceInternalLinks ( $text );
417 $text = $this->doTableStuff ( $text ) ;
418
419 $text = $this->formatHeadings( $text );
420
421 $sk =& $this->mOptions->getSkin();
422 $text = $sk->transformContent( $text );
423 $text .= $this->categoryMagic () ;
424
425 wfProfileOut( $fname );
426 return $text;
427 }
428
429
430 /* private */ function doHeadings( $text )
431 {
432 for ( $i = 6; $i >= 1; --$i ) {
433 $h = substr( "======", 0, $i );
434 $text = preg_replace( "/^{$h}([^=]+){$h}(\\s|$)/m",
435 "<h{$i}>\\1</h{$i}>\\2", $text );
436 }
437 return $text;
438 }
439
440 # Note: we have to do external links before the internal ones,
441 # and otherwise take great care in the order of things here, so
442 # that we don't end up interpreting some URLs twice.
443
444 /* private */ function replaceExternalLinks( $text )
445 {
446 $fname = "OutputPage::replaceExternalLinks";
447 wfProfileIn( $fname );
448 $text = $this->subReplaceExternalLinks( $text, "http", true );
449 $text = $this->subReplaceExternalLinks( $text, "https", true );
450 $text = $this->subReplaceExternalLinks( $text, "ftp", false );
451 $text = $this->subReplaceExternalLinks( $text, "irc", false );
452 $text = $this->subReplaceExternalLinks( $text, "gopher", false );
453 $text = $this->subReplaceExternalLinks( $text, "news", false );
454 $text = $this->subReplaceExternalLinks( $text, "mailto", false );
455 wfProfileOut( $fname );
456 return $text;
457 }
458
459 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber )
460 {
461 $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3";
462 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
463
464 # this is the list of separators that should be ignored if they
465 # are the last character of an URL but that should be included
466 # if they occur within the URL, e.g. "go to www.foo.com, where .."
467 # in this case, the last comma should not become part of the URL,
468 # but in "www.foo.com/123,2342,32.htm" it should.
469 $sep = ",;\.:";
470 $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF";
471 $images = "gif|png|jpg|jpeg";
472
473 # PLEASE NOTE: The curly braces { } are not part of the regex,
474 # they are interpreted as part of the string (used to tell PHP
475 # that the content of the string should be inserted there).
476 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
477 "((?i){$images})([^{$uc}]|$)/";
478
479 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
480 $sk =& $this->mOptions->getSkin();
481
482 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
483 $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" .
484 "/\\4.\\5", "\\4.\\5" ) . "\\6", $s );
485 }
486 $s = preg_replace( $e2, "\\1" . "<a href=\"{$unique}:\\3\"" .
487 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
488 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
489 "</a>\\5", $s );
490 $s = str_replace( $unique, $protocol, $s );
491
492 $a = explode( "[{$protocol}:", " " . $s );
493 $s = array_shift( $a );
494 $s = substr( $s, 1 );
495
496 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
497 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
498
499 foreach ( $a as $line ) {
500 if ( preg_match( $e1, $line, $m ) ) {
501 $link = "{$protocol}:{$m[1]}";
502 $trail = $m[2];
503 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
504 else { $text = wfEscapeHTML( $link ); }
505 } else if ( preg_match( $e2, $line, $m ) ) {
506 $link = "{$protocol}:{$m[1]}";
507 $text = $m[2];
508 $trail = $m[3];
509 } else {
510 $s .= "[{$protocol}:" . $line;
511 continue;
512 }
513 if ( $this->mOptions->getPrintable() ) $paren = " (<i>" . htmlspecialchars ( $link ) . "</i>)";
514 else $paren = "";
515 $la = $sk->getExternalLinkAttributes( $link, $text );
516 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
517
518 }
519 return $s;
520 }
521
522 /* private */ function handle3Quotes( &$state, $token )
523 {
524 if ( $state["strong"] ) {
525 if ( $state["em"] && $state["em"] > $state["strong"] )
526 {
527 # ''' lala ''lala '''
528 $s = "</em></strong><em>";
529 } else {
530 $s = "</strong>";
531 }
532 $state["strong"] = FALSE;
533 } else {
534 $s = "<strong>";
535 $state["strong"] = $token["pos"];
536 }
537 return $s;
538 }
539
540 /* private */ function handle2Quotes( &$state, $token )
541 {
542 if ( $state["em"] ) {
543 if ( $state["strong"] && $state["strong"] > $state["em"] )
544 {
545 # ''lala'''lala'' ....'''
546 $s = "</strong></em><strong>";
547 } else {
548 $s = "</em>";
549 }
550 $state["em"] = FALSE;
551 } else {
552 $s = "<em>";
553 $state["em"] = $token["pos"];
554 }
555 return $s;
556 }
557
558 /* private */ function handle5Quotes( &$state, $token )
559 {
560 if ( $state["em"] && $state["strong"] ) {
561 if ( $state["em"] < $state["strong"] ) {
562 $s .= "</strong></em>";
563 } else {
564 $s .= "</em></strong>";
565 }
566 $state["strong"] = $state["em"] = FALSE;
567 } elseif ( $state["em"] ) {
568 $s .= "</em><strong>";
569 $state["em"] = FALSE;
570 $state["strong"] = $token["pos"];
571 } elseif ( $state["strong"] ) {
572 $s .= "</strong><em>";
573 $state["strong"] = FALSE;
574 $state["em"] = $token["pos"];
575 } else { # not $em and not $strong
576 $s .= "<strong><em>";
577 $state["strong"] = $state["em"] = $token["pos"];
578 }
579 return $s;
580 }
581
582 /* private */ function replaceInternalLinks( $str )
583 {
584 global $wgLang; # for language specific parser hook
585
586 $tokenizer=Tokenizer::newFromString( $str );
587 $tokenStack = array();
588
589 $s="";
590 $state["em"] = FALSE;
591 $state["strong"] = FALSE;
592 $tagIsOpen = FALSE;
593
594 # The tokenizer splits the text into tokens and returns them one by one.
595 # Every call to the tokenizer returns a new token.
596 while ( $token = $tokenizer->nextToken() )
597 {
598 switch ( $token["type"] )
599 {
600 case "text":
601 # simple text with no further markup
602 $txt = $token["text"];
603 break;
604 case "[[":
605 # link opening tag.
606 # FIXME : Treat orphaned open tags (stack not empty when text is over)
607 $tagIsOpen = TRUE;
608 array_push( $tokenStack, $token );
609 $txt="";
610 break;
611 case "]]":
612 # link close tag.
613 # get text from stack, glue it together, and call the code to handle a
614 # link
615 if ( count( $tokenStack ) == 0 )
616 {
617 # stack empty. Found a ]] without an opening [[
618 $txt = "]]";
619 } else {
620 $linkText = "";
621 $lastToken = array_pop( $tokenStack );
622 while ( $lastToken["type"] != "[[" )
623 {
624 $linkText = $lastToken["text"] . $linkText;
625 $lastToken = array_pop( $tokenStack );
626 }
627 $txt = $linkText ."]]";
628 $prefix = $lastToken["text"];
629 $nextToken = $tokenizer->previewToken();
630 if ( $nextToken["type"] == "text" )
631 {
632 # Preview just looks at it. Now we have to fetch it.
633 $nextToken = $tokenizer->nextToken();
634 $txt .= $nextToken["text"];
635 }
636 $txt = $this->handleInternalLink( $txt, $prefix );
637 }
638 $tagIsOpen = (count( $tokenStack ) != 0);
639 break;
640 case "----":
641 $txt = "\n<hr>\n";
642 break;
643 case "'''":
644 # This and the three next ones handle quotes
645 $txt = $this->handle3Quotes( $state, $token );
646 break;
647 case "''":
648 $txt = $this->handle2Quotes( $state, $token );
649 break;
650 case "'''''":
651 $txt = $this->handle5Quotes( $state, $token );
652 break;
653 case "":
654 # empty token
655 $txt="";
656 break;
657 case "RFC ":
658 if ( $tagIsOpen ) {
659 $txt = "RFC ";
660 } else {
661 $txt = $this->doMagicRFC( $tokenizer );
662 }
663 break;
664 case "ISBN ":
665 $txt = $this->doMagicISBN( $tokenizer );
666 break;
667 default:
668 # Call language specific Hook.
669 $txt = $wgLang->processToken( $token, $tokenStack );
670 if ( NULL == $txt ) {
671 # An unkown token. Highlight.
672 $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
673 $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
674 }
675 break;
676 }
677 # If we're parsing the interior of a link, don't append the interior to $s,
678 # but push it to the stack so it can be processed when a ]] token is found.
679 if ( $tagIsOpen && $txt != "" ) {
680 $token["type"] = "text";
681 $token["text"] = $txt;
682 array_push( $tokenStack, $token );
683 } else {
684 $s .= $txt;
685 }
686 } #end while
687 if ( count( $tokenStack ) != 0 )
688 {
689 # still objects on stack. opened [[ tag without closing ]] tag.
690 $txt = "";
691 while ( $lastToken = array_pop( $tokenStack ) )
692 {
693 if ( $lastToken["type"] == "text" )
694 {
695 $txt = $lastToken["text"] . $txt;
696 } else {
697 $txt = $lastToken["type"] . $txt;
698 }
699 }
700 $s .= $txt;
701 }
702 return $s;
703 }
704
705 /* private */ function handleInternalLink( $line, $prefix )
706 {
707 global $wgLang, $wgLinkCache;
708 global $wgNamespacesWithSubpages, $wgLanguageCode;
709 static $fname = "OutputPage::replaceInternalLinks" ;
710 wfProfileIn( $fname );
711
712 wfProfileIn( "$fname-setup" );
713 static $tc = FALSE;
714 if ( !$tc ) { $tc = Title::legalChars() . "#"; }
715 $sk =& $this->mOptions->getSkin();
716
717 # Match a link having the form [[namespace:link|alternate]]trail
718 static $e1 = FALSE;
719 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
720 # Match the end of a line for a word that's not followed by whitespace,
721 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
722 #$e2 = "/^(.*)\\b(\\w+)\$/suD";
723 #$e2 = "/^(.*\\s)(\\S+)\$/suD";
724 static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD';
725
726
727 # Special and Media are pseudo-namespaces; no pages actually exist in them
728 static $image = FALSE;
729 static $special = FALSE;
730 static $media = FALSE;
731 static $category = FALSE;
732 if ( !$image ) { $image = Namespace::getImage(); }
733 if ( !$special ) { $special = Namespace::getSpecial(); }
734 if ( !$media ) { $media = Namespace::getMedia(); }
735 if ( !$category ) { $category = wfMsg ( "category" ) ; }
736
737 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
738
739 wfProfileOut( "$fname-setup" );
740
741 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
742 $text = $m[2];
743 $trail = $m[3];
744 } else { # Invalid form; output directly
745 $s .= $prefix . "[[" . $line ;
746 return $s;
747 }
748
749 /* Valid link forms:
750 Foobar -- normal
751 :Foobar -- override special treatment of prefix (images, language links)
752 /Foobar -- convert to CurrentPage/Foobar
753 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
754 */
755 $c = substr($m[1],0,1);
756 $noforce = ($c != ":");
757 if( $c == "/" ) { # subpage
758 if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown
759 $m[1]=substr($m[1],1,strlen($m[1])-2);
760 $noslash=$m[1];
761 } else {
762 $noslash=substr($m[1],1);
763 }
764 if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here
765 $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash);
766 if( "" == $text ) {
767 $text= $m[1];
768 } # this might be changed for ugliness reasons
769 } else {
770 $link = $noslash; # no subpage allowed, use standard link
771 }
772 } elseif( $noforce ) { # no subpage
773 $link = $m[1];
774 } else {
775 $link = substr( $m[1], 1 );
776 }
777 if( "" == $text )
778 $text = $link;
779
780 $nt = Title::newFromText( $link );
781 if( !$nt ) {
782 $s .= $prefix . "[[" . $line;
783 return $s;
784 }
785 $ns = $nt->getNamespace();
786 $iw = $nt->getInterWiki();
787 if( $noforce ) {
788 if( $iw && $wgInterwikiMagic && $nottalk && $wgLang->getLanguageName( $iw ) ) {
789 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
790 $s .= $prefix . $trail;
791 return $s;
792 }
793 if( $ns == $image ) {
794 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
795 $wgLinkCache->addImageLinkObj( $nt );
796 return $s;
797 }
798 }
799 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
800 ( strpos( $link, "#" ) == FALSE ) ) {
801 $s .= $prefix . "<strong>" . $text . "</strong>" . $trail;
802 return $s;
803 }
804 if ( $ns == $category && $this->mOptions->getUseCategoryMagic() ) {
805 $t = explode ( ":" , $nt->getText() ) ;
806 array_shift ( $t ) ;
807 $t = implode ( ":" , $t ) ;
808 $t = $wgLang->ucFirst ( $t ) ;
809 # $t = $sk->makeKnownLink( $category.":".$t, $t, "", $trail , $prefix );
810 $nnt = Title::newFromText ( $category.":".$t ) ;
811 $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix );
812 $this->mCategoryLinks[] = $t ;
813 $s .= $prefix . $trail ;
814 return $s ;
815 }
816 if( $ns == $media ) {
817 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
818 $wgLinkCache->addImageLinkObj( $nt );
819 return $s;
820 } elseif( $ns == $special ) {
821 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail );
822 return $s;
823 }
824 $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix );
825
826 wfProfileOut( $fname );
827 return $s;
828 }
829
830 # Some functions here used by doBlockLevels()
831 #
832 /* private */ function closeParagraph()
833 {
834 $result = "";
835 if ( 0 != strcmp( "p", $this->mLastSection ) &&
836 0 != strcmp( "", $this->mLastSection ) ) {
837 $result = "</" . $this->mLastSection . ">";
838 }
839 $this->mLastSection = "";
840 return $result."\n";
841 }
842 # getCommon() returns the length of the longest common substring
843 # of both arguments, starting at the beginning of both.
844 #
845 /* private */ function getCommon( $st1, $st2 )
846 {
847 $fl = strlen( $st1 );
848 $shorter = strlen( $st2 );
849 if ( $fl < $shorter ) { $shorter = $fl; }
850
851 for ( $i = 0; $i < $shorter; ++$i ) {
852 if ( $st1{$i} != $st2{$i} ) { break; }
853 }
854 return $i;
855 }
856 # These next three functions open, continue, and close the list
857 # element appropriate to the prefix character passed into them.
858 #
859 /* private */ function openList( $char )
860 {
861 $result = $this->closeParagraph();
862
863 if ( "*" == $char ) { $result .= "<ul><li>"; }
864 else if ( "#" == $char ) { $result .= "<ol><li>"; }
865 else if ( ":" == $char ) { $result .= "<dl><dd>"; }
866 else if ( ";" == $char ) {
867 $result .= "<dl><dt>";
868 $this->mDTopen = true;
869 }
870 else { $result = "<!-- ERR 1 -->"; }
871
872 return $result;
873 }
874
875 /* private */ function nextItem( $char )
876 {
877 if ( "*" == $char || "#" == $char ) { return "</li><li>"; }
878 else if ( ":" == $char || ";" == $char ) {
879 $close = "</dd>";
880 if ( $this->mDTopen ) { $close = "</dt>"; }
881 if ( ";" == $char ) {
882 $this->mDTopen = true;
883 return $close . "<dt>";
884 } else {
885 $this->mDTopen = false;
886 return $close . "<dd>";
887 }
888 }
889 return "<!-- ERR 2 -->";
890 }
891
892 /* private */function closeList( $char )
893 {
894 if ( "*" == $char ) { $text = "</li></ul>"; }
895 else if ( "#" == $char ) { $text = "</li></ol>"; }
896 else if ( ":" == $char ) {
897 if ( $this->mDTopen ) {
898 $this->mDTopen = false;
899 $text = "</dt></dl>";
900 } else {
901 $text = "</dd></dl>";
902 }
903 }
904 else { return "<!-- ERR 3 -->"; }
905 return $text."\n";
906 }
907
908 /* private */ function doBlockLevels( $text, $linestart )
909 {
910 $fname = "OutputPage::doBlockLevels";
911 wfProfileIn( $fname );
912 # Parsing through the text line by line. The main thing
913 # happening here is handling of block-level elements p, pre,
914 # and making lists from lines starting with * # : etc.
915 #
916 $a = explode( "\n", $text );
917 $text = $lastPref = "";
918 $this->mDTopen = $inBlockElem = false;
919
920 if ( ! $linestart ) { $text .= array_shift( $a ); }
921 foreach ( $a as $t ) {
922 if ( "" != $text ) { $text .= "\n"; }
923
924 $oLine = $t;
925 $opl = strlen( $lastPref );
926 $npl = strspn( $t, "*#:;" );
927 $pref = substr( $t, 0, $npl );
928 $pref2 = str_replace( ";", ":", $pref );
929 $t = substr( $t, $npl );
930
931 if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
932 $text .= $this->nextItem( substr( $pref, -1 ) );
933
934 if ( ";" == substr( $pref, -1 ) ) {
935 $cpos = strpos( $t, ":" );
936 if ( ! ( false === $cpos ) ) {
937 $term = substr( $t, 0, $cpos );
938 $text .= $term . $this->nextItem( ":" );
939 $t = substr( $t, $cpos + 1 );
940 }
941 }
942 } else if (0 != $npl || 0 != $opl) {
943 $cpl = $this->getCommon( $pref, $lastPref );
944
945 while ( $cpl < $opl ) {
946 $text .= $this->closeList( $lastPref{$opl-1} );
947 --$opl;
948 }
949 if ( $npl <= $cpl && $cpl > 0 ) {
950 $text .= $this->nextItem( $pref{$cpl-1} );
951 }
952 while ( $npl > $cpl ) {
953 $char = substr( $pref, $cpl, 1 );
954 $text .= $this->openList( $char );
955
956 if ( ";" == $char ) {
957 $cpos = strpos( $t, ":" );
958 if ( ! ( false === $cpos ) ) {
959 $term = substr( $t, 0, $cpos );
960 $text .= $term . $this->nextItem( ":" );
961 $t = substr( $t, $cpos + 1 );
962 }
963 }
964 ++$cpl;
965 }
966 $lastPref = $pref2;
967 }
968 if ( 0 == $npl ) { # No prefix--go to paragraph mode
969 if ( preg_match(
970 "/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6)/i", $t ) ) {
971 $text .= $this->closeParagraph();
972 $inBlockElem = true;
973 }
974 if ( ! $inBlockElem ) {
975 if ( " " == $t{0} ) {
976 $newSection = "pre";
977 # $t = wfEscapeHTML( $t );
978 }
979 else { $newSection = "p"; }
980
981 if ( 0 == strcmp( "", trim( $oLine ) ) ) {
982 $text .= $this->closeParagraph();
983 $text .= "<" . $newSection . ">";
984 } else if ( 0 != strcmp( $this->mLastSection,
985 $newSection ) ) {
986 $text .= $this->closeParagraph();
987 if ( 0 != strcmp( "p", $newSection ) ) {
988 $text .= "<" . $newSection . ">";
989 }
990 }
991 $this->mLastSection = $newSection;
992 }
993 if ( $inBlockElem &&
994 preg_match( "/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6)/i", $t ) ) {
995 $inBlockElem = false;
996 }
997 }
998 $text .= $t;
999 }
1000 while ( $npl ) {
1001 $text .= $this->closeList( $pref2{$npl-1} );
1002 --$npl;
1003 }
1004 if ( "" != $this->mLastSection ) {
1005 if ( "p" != $this->mLastSection ) {
1006 $text .= "</" . $this->mLastSection . ">";
1007 }
1008 $this->mLastSection = "";
1009 }
1010 wfProfileOut( $fname );
1011 return $text;
1012 }
1013
1014 /* private */ function replaceVariables( $text )
1015 {
1016 global $wgLang, $wgCurOut;
1017 $fname = "OutputPage::replaceVariables";
1018 wfProfileIn( $fname );
1019
1020 $magic = array();
1021
1022 # Basic variables
1023 # See Language.php for the definition of each magic word
1024 # As with sigs, this uses the server's local time -- ensure
1025 # this is appropriate for your audience!
1026
1027 $magic[MAG_CURRENTMONTH] = date( "m" );
1028 $magic[MAG_CURRENTMONTHNAME] = $wgLang->getMonthName( date("n") );
1029 $magic[MAG_CURRENTMONTHNAMEGEN] = $wgLang->getMonthNameGen( date("n") );
1030 $magic[MAG_CURRENTDAY] = date("j");
1031 $magic[MAG_CURRENTDAYNAME] = $wgLang->getWeekdayName( date("w")+1 );
1032 $magic[MAG_CURRENTYEAR] = date( "Y" );
1033 $magic[MAG_CURRENTTIME] = $wgLang->time( wfTimestampNow(), false );
1034
1035 $this->mContainsOldMagic += MagicWord::replaceMultiple($magic, $text, $text);
1036
1037 $mw =& MagicWord::get( MAG_NUMBEROFARTICLES );
1038 if ( $mw->match( $text ) ) {
1039 $v = wfNumberOfArticles();
1040 $text = $mw->replace( $v, $text );
1041 if( $mw->getWasModified() ) { $this->mContainsOldMagic++; }
1042 }
1043
1044 # "Variables" with an additional parameter e.g. {{MSG:wikipedia}}
1045 # The callbacks are at the bottom of this file
1046 $wgCurOut = $this;
1047 $mw =& MagicWord::get( MAG_MSG );
1048 $text = $mw->substituteCallback( $text, "wfReplaceMsgVar" );
1049 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1050
1051 $mw =& MagicWord::get( MAG_MSGNW );
1052 $text = $mw->substituteCallback( $text, "wfReplaceMsgnwVar" );
1053 if( $mw->getWasModified() ) { $this->mContainsNewMagic++; }
1054
1055 wfProfileOut( $fname );
1056 return $text;
1057 }
1058
1059 # Cleans up HTML, removes dangerous tags and attributes
1060 /* private */ function removeHTMLtags( $text )
1061 {
1062 $fname = "OutputPage::removeHTMLtags";
1063 wfProfileIn( $fname );
1064 $htmlpairs = array( # Tags that must be closed
1065 "b", "i", "u", "font", "big", "small", "sub", "sup", "h1",
1066 "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
1067 "strike", "strong", "tt", "var", "div", "center",
1068 "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
1069 "ruby", "rt" , "rb" , "rp"
1070 );
1071 $htmlsingle = array(
1072 "br", "p", "hr", "li", "dt", "dd"
1073 );
1074 $htmlnest = array( # Tags that can be nested--??
1075 "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
1076 "dl", "font", "big", "small", "sub", "sup"
1077 );
1078 $tabletags = array( # Can only appear inside table
1079 "td", "th", "tr"
1080 );
1081
1082 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1083 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1084
1085 $htmlattrs = $this->getHTMLattrs () ;
1086
1087 # Remove HTML comments
1088 $text = preg_replace( "/<!--.*-->/sU", "", $text );
1089
1090 $bits = explode( "<", $text );
1091 $text = array_shift( $bits );
1092 $tagstack = array(); $tablestack = array();
1093
1094 foreach ( $bits as $x ) {
1095 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1096 preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
1097 $x, $regs );
1098 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1099 error_reporting( $prev );
1100
1101 $badtag = 0 ;
1102 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1103 # Check our stack
1104 if ( $slash ) {
1105 # Closing a tag...
1106 if ( ! in_array( $t, $htmlsingle ) &&
1107 ( $ot = array_pop( $tagstack ) ) != $t ) {
1108 array_push( $tagstack, $ot );
1109 $badtag = 1;
1110 } else {
1111 if ( $t == "table" ) {
1112 $tagstack = array_pop( $tablestack );
1113 }
1114 $newparams = "";
1115 }
1116 } else {
1117 # Keep track for later
1118 if ( in_array( $t, $tabletags ) &&
1119 ! in_array( "table", $tagstack ) ) {
1120 $badtag = 1;
1121 } else if ( in_array( $t, $tagstack ) &&
1122 ! in_array ( $t , $htmlnest ) ) {
1123 $badtag = 1 ;
1124 } else if ( ! in_array( $t, $htmlsingle ) ) {
1125 if ( $t == "table" ) {
1126 array_push( $tablestack, $tagstack );
1127 $tagstack = array();
1128 }
1129 array_push( $tagstack, $t );
1130 }
1131 # Strip non-approved attributes from the tag
1132 $newparams = $this->fixTagAttributes($params);
1133
1134 }
1135 if ( ! $badtag ) {
1136 $rest = str_replace( ">", "&gt;", $rest );
1137 $text .= "<$slash$t $newparams$brace$rest";
1138 continue;
1139 }
1140 }
1141 $text .= "&lt;" . str_replace( ">", "&gt;", $x);
1142 }
1143 # Close off any remaining tags
1144 while ( $t = array_pop( $tagstack ) ) {
1145 $text .= "</$t>\n";
1146 if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
1147 }
1148 wfProfileOut( $fname );
1149 return $text;
1150 }
1151
1152 /*
1153 *
1154 * This function accomplishes several tasks:
1155 * 1) Auto-number headings if that option is enabled
1156 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1157 * 3) Add a Table of contents on the top for users who have enabled the option
1158 * 4) Auto-anchor headings
1159 *
1160 * It loops through all headlines, collects the necessary data, then splits up the
1161 * string and re-inserts the newly formatted headlines.
1162 *
1163 * */
1164 /* private */ function formatHeadings( $text )
1165 {
1166 $nh=$this->mOptions->getNumberHeadings();
1167 $st=$this->mOptions->getShowToc();
1168 if(!$this->mTitle->userCanEdit()) {
1169 $es=0;
1170 $esr=0;
1171 } else {
1172 $es=$this->mOptions->getEditSection();
1173 $esr=$this->mOptions->getEditSectionOnRightClick();
1174 }
1175
1176 # Inhibit editsection links if requested in the page
1177 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1178 if ($esw->matchAndRemove( $text )) {
1179 $es=0;
1180 }
1181 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1182 # do not add TOC
1183 $mw =& MagicWord::get( MAG_NOTOC );
1184 if ($mw->matchAndRemove( $text ))
1185 {
1186 $st = 0;
1187 }
1188
1189 # never add the TOC to the Main Page. This is an entry page that should not
1190 # be more than 1-2 screens large anyway
1191 if($this->mTitle->getPrefixedText()==wfMsg("mainpage")) {$st=0;}
1192
1193 # We need this to perform operations on the HTML
1194 $sk =& $this->mOptions->getSkin();
1195
1196 # Get all headlines for numbering them and adding funky stuff like [edit]
1197 # links
1198 preg_match_all("/<H([1-6])(.*?>)(.*?)<\/H[1-6]>/i",$text,$matches);
1199
1200 # headline counter
1201 $c=0;
1202
1203 # Ugh .. the TOC should have neat indentation levels which can be
1204 # passed to the skin functions. These are determined here
1205 foreach($matches[3] as $headline) {
1206 if($level) { $prevlevel=$level;}
1207 $level=$matches[1][$c];
1208 if(($nh||$st) && $prevlevel && $level>$prevlevel) {
1209
1210 $h[$level]=0; // reset when we enter a new level
1211 $toc.=$sk->tocIndent($level-$prevlevel);
1212 $toclevel+=$level-$prevlevel;
1213
1214 }
1215 if(($nh||$st) && $level<$prevlevel) {
1216 $h[$level+1]=0; // reset when we step back a level
1217 $toc.=$sk->tocUnindent($prevlevel-$level);
1218 $toclevel-=$prevlevel-$level;
1219
1220 }
1221 $h[$level]++; // count number of headlines for each level
1222
1223 if($nh||$st) {
1224 for($i=1;$i<=$level;$i++) {
1225 if($h[$i]) {
1226 if($dot) {$numbering.=".";}
1227 $numbering.=$h[$i];
1228 $dot=1;
1229 }
1230 }
1231 }
1232
1233 // The canonized header is a version of the header text safe to use for links
1234 // Avoid insertion of weird stuff like <math> by expanding the relevant sections
1235 $canonized_headline=Parser::unstrip( $headline, $this->mStripState );
1236 $canonized_headline=preg_replace("/<.*?>/","",$canonized_headline); // strip out HTML
1237 $tocline = trim( $canonized_headline );
1238 $canonized_headline=str_replace('"',"",$canonized_headline);
1239 $canonized_headline=str_replace(" ","_",trim($canonized_headline));
1240 $refer[$c]=$canonized_headline;
1241 $refers[$canonized_headline]++; // count how many in assoc. array so we can track dupes in anchors
1242 $refcount[$c]=$refers[$canonized_headline];
1243
1244 // Prepend the number to the heading text
1245
1246 if($nh||$st) {
1247 $tocline=$numbering ." ". $tocline;
1248
1249 // Don't number the heading if it is the only one (looks silly)
1250 if($nh && count($matches[3]) > 1) {
1251 $headline=$numbering . " " . $headline; // the two are different if the line contains a link
1252 }
1253 }
1254
1255 // Create the anchor for linking from the TOC to the section
1256
1257 $anchor=$canonized_headline;
1258 if($refcount[$c]>1) {$anchor.="_".$refcount[$c];}
1259 if($st) {
1260 $toc.=$sk->tocLine($anchor,$tocline,$toclevel);
1261 }
1262 if($es) {
1263 $head[$c].=$sk->editSectionLink($c+1);
1264 }
1265
1266 // Put it all together
1267
1268 $head[$c].="<h".$level.$matches[2][$c]
1269 ."<a name=\"".$anchor."\">"
1270 .$headline
1271 ."</a>"
1272 ."</h".$level.">";
1273
1274 // Add the edit section link
1275
1276 if($esr) {
1277 $head[$c]=$sk->editSectionScript($c+1,$head[$c]);
1278 }
1279
1280 $numbering="";
1281 $c++;
1282 $dot=0;
1283 }
1284
1285 if($st) {
1286 $toclines=$c;
1287 $toc.=$sk->tocUnindent($toclevel);
1288 $toc=$sk->tocTable($toc);
1289 }
1290
1291 // split up and insert constructed headlines
1292
1293 $blocks=preg_split("/<H[1-6].*?>.*?<\/H[1-6]>/i",$text);
1294 $i=0;
1295
1296 foreach($blocks as $block) {
1297 if(($es) && $c>0 && $i==0) {
1298 # This is the [edit] link that appears for the top block of text when
1299 # section editing is enabled
1300 $full.=$sk->editSectionLink(0);
1301 }
1302 $full.=$block;
1303 if($st && $toclines>3 && !$i) {
1304 # Let's add a top anchor just in case we want to link to the top of the page
1305 $full="<a name=\"top\"></a>".$full.$toc;
1306 }
1307
1308 $full.=$head[$i];
1309 $i++;
1310 }
1311
1312 return $full;
1313 }
1314
1315 /* private */ function doMagicISBN( &$tokenizer )
1316 {
1317 global $wgLang;
1318
1319 # Check whether next token is a text token
1320 # If yes, fetch it and convert the text into a
1321 # Special::BookSources link
1322 $token = $tokenizer->previewToken();
1323 while ( $token["type"] == "" )
1324 {
1325 $tokenizer->nextToken();
1326 $token = $tokenizer->previewToken();
1327 }
1328 if ( $token["type"] == "text" )
1329 {
1330 $token = $tokenizer->nextToken();
1331 $x = $token["text"];
1332 $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1333
1334 $isbn = $blank = "" ;
1335 while ( " " == $x{0} ) {
1336 $blank .= " ";
1337 $x = substr( $x, 1 );
1338 }
1339 while ( strstr( $valid, $x{0} ) != false ) {
1340 $isbn .= $x{0};
1341 $x = substr( $x, 1 );
1342 }
1343 $num = str_replace( "-", "", $isbn );
1344 $num = str_replace( " ", "", $num );
1345
1346 if ( "" == $num ) {
1347 $text .= "ISBN $blank$x";
1348 } else {
1349 $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" );
1350 $text .= "<a href=\"" .
1351 $titleObj->getUrl( "isbn={$num}", false, true ) .
1352 "\" class=\"internal\">ISBN $isbn</a>";
1353 $text .= $x;
1354 }
1355 } else {
1356 $text = "ISBN ";
1357 }
1358 return $text;
1359 }
1360 /* private */ function doMagicRFC( &$tokenizer )
1361 {
1362 global $wgLang;
1363
1364 # Check whether next token is a text token
1365 # If yes, fetch it and convert the text into a
1366 # link to an RFC source
1367 $token = $tokenizer->previewToken();
1368 while ( $token["type"] == "" )
1369 {
1370 $tokenizer->nextToken();
1371 $token = $tokenizer->previewToken();
1372 }
1373 if ( $token["type"] == "text" )
1374 {
1375 $token = $tokenizer->nextToken();
1376 $x = $token["text"];
1377 $valid = "0123456789";
1378
1379 $rfc = $blank = "" ;
1380 while ( " " == $x{0} ) {
1381 $blank .= " ";
1382 $x = substr( $x, 1 );
1383 }
1384 while ( strstr( $valid, $x{0} ) != false ) {
1385 $rfc .= $x{0};
1386 $x = substr( $x, 1 );
1387 }
1388
1389 if ( "" == $rfc ) {
1390 $text .= "RFC $blank$x";
1391 } else {
1392 $url = wfmsg( "rfcurl" );
1393 $url = str_replace( "$1", $rfc, $url);
1394 $sk =& $this->mOptions->getSkin();
1395 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
1396 $text = "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
1397 }
1398 } else {
1399 $text = "RFC ";
1400 }
1401 return $text;
1402 }
1403
1404 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true )
1405 {
1406 $this->mOptions = $options;
1407 $this->mTitle = $title;
1408 if ( $clearState ) {
1409 $this->clearState;
1410 }
1411
1412 $stripState = false;
1413 $text = $this->strip( $text, $stripState, false );
1414 $text = $this->pstPass2( $text, $user );
1415 $text = $this->unstrip( $text, $stripState );
1416 return $text;
1417 }
1418
1419 /* private */ function pstPass2( $text, &$user )
1420 {
1421 global $wgLang, $wgLocaltimezone;
1422
1423 # Signatures
1424 #
1425 $n = $user->getName();
1426 $k = $user->getOption( "nickname" );
1427 if ( "" == $k ) { $k = $n; }
1428 if(isset($wgLocaltimezone)) {
1429 $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone");
1430 }
1431 /* Note: this is an ugly timezone hack for the European wikis */
1432 $d = $wgLang->timeanddate( date( "YmdHis" ), false ) .
1433 " (" . date( "T" ) . ")";
1434 if(isset($wgLocaltimezone)) putenv("TZ=$oldtz");
1435
1436 $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText(
1437 Namespace::getUser() ) . ":$n|$k]] $d", $text );
1438 $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText(
1439 Namespace::getUser() ) . ":$n|$k]]", $text );
1440
1441 # Context links: [[|name]] and [[name (context)|]]
1442 #
1443 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
1444 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
1445 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
1446 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
1447
1448 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
1449 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
1450 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
1451 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
1452 # [[ns:page (cont)|]]
1453 $context = "";
1454 $t = $this->mTitle->getText();
1455 if ( preg_match( $conpat, $t, $m ) ) {
1456 $context = $m[2];
1457 }
1458 $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text );
1459 $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text );
1460 $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text );
1461
1462 if ( "" == $context ) {
1463 $text = preg_replace( $p2, "[[\\1]]", $text );
1464 } else {
1465 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
1466 }
1467
1468 # {{SUBST:xxx}} variables
1469 #
1470 $mw =& MagicWord::get( MAG_SUBST );
1471 $text = $mw->substituteCallback( $text, "wfReplaceSubstVar" );
1472
1473 # Trim trailing whitespace
1474 # MAG_END (__END__) tag allows for trailing
1475 # whitespace to be deliberately included
1476 $text = rtrim( $text );
1477 $mw =& MagicWord::get( MAG_END );
1478 $mw->matchAndRemove( $text );
1479
1480 return $text;
1481 }
1482
1483
1484 }
1485
1486 class ParserOutput
1487 {
1488 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
1489
1490 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
1491 $containsOldMagic = false )
1492 {
1493 $this->mText = $text;
1494 $this->mLanguageLinks = $languageLinks;
1495 $this->mCategoryLinks = $categoryLinks;
1496 $this->mContainsOldMagic = $containsOldMagic;
1497 }
1498
1499 function getText() { return $this->mText; }
1500 function getLanguageLinks() { return $this->mLanguageLinks; }
1501 function getCategoryLinks() { return $this->mCategoryLinks; }
1502 function containsOldMagic() { return $this->mContainsOldMagic; }
1503 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
1504 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
1505 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
1506 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
1507 }
1508
1509 class ParserOptions
1510 {
1511 # All variables are private
1512 var $mUseTeX; # Use texvc to expand <math> tags
1513 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
1514 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
1515 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
1516 var $mAllowExternalImages; # Allow external images inline
1517 var $mSkin; # Reference to the preferred skin
1518 var $mDateFormat; # Date format index
1519 var $mEditSection; # Create "edit section" links
1520 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
1521 var $mPrintable; # Generate printable output
1522 var $mNumberHeadings; # Automatically number headings
1523 var $mShowToc; # Show table of contents
1524
1525 function getUseTeX() { return $this->mUseTeX; }
1526 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
1527 function getUseDynamicDates() { return $this->mUseDynamicDates; }
1528 function getInterwikiMagic() { return $this->mInterwikiMagic; }
1529 function getAllowExternalImages() { return $this->mAllowExternalImages; }
1530 function getSkin() { return $this->mSkin; }
1531 function getDateFormat() { return $this->mDateFormat; }
1532 function getEditSection() { return $this->mEditSection; }
1533 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
1534 function getPrintable() { return $this->mPrintable; }
1535 function getNumberHeadings() { return $this->mNumberHeadings; }
1536 function getShowToc() { return $this->mShowToc; }
1537
1538 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
1539 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
1540 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
1541 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
1542 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
1543 function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); }
1544 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
1545 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
1546 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
1547 function setPrintable( $x ) { return wfSetVar( $this->mPrintable, $x ); }
1548 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
1549 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
1550
1551 /* static */ function newFromUser( &$user )
1552 {
1553 $popts = new ParserOptions;
1554 $popts->initialiseFromUser( &$user );
1555 return $popts;
1556 }
1557
1558 function initialiseFromUser( &$userInput )
1559 {
1560 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
1561
1562 if ( !$userInput ) {
1563 $user = new User;
1564 } else {
1565 $user =& $userInput;
1566 }
1567
1568 $this->mUseTeX = $wgUseTeX;
1569 $this->mUseCategoryMagic = $wgUseCategoryMagic;
1570 $this->mUseDynamicDates = $wgUseDynamicDates;
1571 $this->mInterwikiMagic = $wgInterwikiMagic;
1572 $this->mAllowExternalImages = $wgAllowExternalImages;
1573 $this->mSkin =& $user->getSkin();
1574 $this->mDateFormat = $user->getOption( "date" );
1575 $this->mEditSection = $user->getOption( "editsection" );
1576 $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" );
1577 $this->mPrintable = false;
1578 $this->mNumberHeadings = $user->getOption( "numberheadings" );
1579 $this->mShowToc = $user->getOption( "showtoc" );
1580 }
1581
1582
1583 }
1584
1585 # Regex callbacks, used in OutputPage::replaceVariables
1586
1587 # Just get rid of the dangerous stuff
1588 # Necessary because replaceVariables is called after removeHTMLtags,
1589 # and message text can come from any user
1590 function wfReplaceMsgVar( $matches ) {
1591 global $wgCurOut, $wgLinkCache;
1592 $text = $wgCurOut->removeHTMLtags( wfMsg( $matches[1] ) );
1593 $wgLinkCache->suspend();
1594 $text = $wgCurOut->replaceInternalLinks( $text );
1595 $wgLinkCache->resume();
1596 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1597 return $text;
1598 }
1599
1600 # Effective <nowiki></nowiki>
1601 # Not real <nowiki> because this is called after nowiki sections are processed
1602 function wfReplaceMsgnwVar( $matches ) {
1603 global $wgCurOut, $wgLinkCache;
1604 $text = wfEscapeWikiText( wfMsg( $matches[1] ) );
1605 $wgLinkCache->addLinkObj( Title::makeTitle( NS_MEDIAWIKI, $matches[1] ) );
1606 return $text;
1607 }
1608
1609
1610
1611 ?>