add [;] to french space character list
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 // require_once('Tokenizer.php');
4
5 # PHP Parser
6 #
7 # Processes wiki markup
8 #
9 # There are two main entry points into the Parser class: parse() and preSaveTransform().
10 # The parse() function produces HTML output, preSaveTransform() produces altered wiki markup.
11 #
12 # Globals used:
13 # objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
14 #
15 # NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
16 #
17 # settings: $wgUseTex*, $wgUseCategoryMagic*, $wgUseDynamicDates*, $wgInterwikiMagic*,
18 # $wgNamespacesWithSubpages, $wgLanguageCode, $wgAllowExternalImages*,
19 # $wgLocaltimezone
20 #
21 # * only within ParserOptions
22 #
23 #
24 #----------------------------------------
25 # Variable substitution O(N^2) attack
26 #-----------------------------------------
27 # Without countermeasures, it would be possible to attack the parser by saving a page
28 # filled with a large number of inclusions of large pages. The size of the generated
29 # page would be proportional to the square of the input size. Hence, we limit the number
30 # of inclusions of any given page, thus bringing any attack back to O(N).
31 #
32
33 define( "MAX_INCLUDE_REPEAT", 5 );
34
35 # Allowed values for $mOutputType
36 define( "OT_HTML", 1 );
37 define( "OT_WIKI", 2 );
38 define( "OT_MSG", 3 );
39
40 # string parameter for extractTags which will cause it
41 # to strip HTML comments in addition to regular
42 # <XML>-style tags. This should not be anything we
43 # may want to use in wikisyntax
44 define( "STRIP_COMMENTS", "HTMLCommentStrip" );
45
46 # prefix for escaping, used in two functions at least
47 define( "UNIQ_PREFIX", "NaodW29");
48
49 class Parser
50 {
51 # Persistent:
52 var $mTagHooks;
53
54 # Cleared with clearState():
55 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
56 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
57
58 # Temporary:
59 var $mOptions, $mTitle, $mOutputType;
60
61 function Parser() {
62 $this->mTagHooks = array();
63 $this->clearState();
64 }
65
66 function clearState() {
67 $this->mOutput = new ParserOutput;
68 $this->mAutonumber = 0;
69 $this->mLastSection = "";
70 $this->mDTopen = false;
71 $this->mVariables = false;
72 $this->mIncludeCount = array();
73 $this->mStripState = array();
74 $this->mArgStack = array();
75 $this->mInPre = false;
76 }
77
78 # First pass--just handle <nowiki> sections, pass the rest off
79 # to internalParse() which does all the real work.
80 #
81 # Returns a ParserOutput
82 #
83 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
84 global $wgUseTidy;
85 $fname = "Parser::parse";
86 wfProfileIn( $fname );
87
88 if ( $clearState ) {
89 $this->clearState();
90 }
91
92 $this->mOptions = $options;
93 $this->mTitle =& $title;
94 $this->mOutputType = OT_HTML;
95
96 $stripState = NULL;
97 $text = $this->strip( $text, $this->mStripState );
98 $text = $this->internalParse( $text, $linestart );
99 $text = $this->unstrip( $text, $this->mStripState );
100 # Clean up special characters, only run once, next-to-last before doBlockLevels
101 if(!$wgUseTidy) {
102 $fixtags = array(
103 # french spaces, last one Guillemet-left
104 # only if there is something before the space
105 '/(.) (\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
106 # french spaces, Guillemet-right
107 "/(\\302\\253) /i"=>"\\1&nbsp;",
108 '/<hr *>/i' => '<hr />',
109 '/<br *>/i' => '<br />',
110 '/<center *>/i' => '<div class="center">',
111 '/<\\/center *>/i' => '</div>',
112 # Clean up spare ampersands; note that we probably ought to be
113 # more careful about named entities.
114 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
115 );
116 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
117 } else {
118 $fixtags = array(
119 # french spaces, last one Guillemet-left
120 '/ (\\?|:|!|\\302\\273)/i' => '&nbsp;\\1',
121 # french spaces, Guillemet-right
122 '/(\\302\\253) /i' => '\\1&nbsp;',
123 '/<center *>/i' => '<div class="center">',
124 '/<\\/center *>/i' => '</div>'
125 );
126 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
127 }
128 # only once and last
129 $text = $this->doBlockLevels( $text, $linestart );
130 $text = $this->unstripNoWiki( $text, $this->mStripState );
131 if($wgUseTidy) {
132 $text = $this->tidy($text);
133 }
134 $this->mOutput->setText( $text );
135 wfProfileOut( $fname );
136 return $this->mOutput;
137 }
138
139 /* static */ function getRandomString() {
140 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
141 }
142
143 # Replaces all occurrences of <$tag>content</$tag> in the text
144 # with a random marker and returns the new text. the output parameter
145 # $content will be an associative array filled with data on the form
146 # $unique_marker => content.
147
148 # If $content is already set, the additional entries will be appended
149
150 # If $tag is set to STRIP_COMMENTS, the function will extract
151 # <!-- HTML comments -->
152
153 /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){
154 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
155 if ( !$content ) {
156 $content = array( );
157 }
158 $n = 1;
159 $stripped = '';
160
161 while ( '' != $text ) {
162 if($tag==STRIP_COMMENTS) {
163 $p = preg_split( '/<!--/i', $text, 2 );
164 } else {
165 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
166 }
167 $stripped .= $p[0];
168 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
169 $text = '';
170 } else {
171 if($tag==STRIP_COMMENTS) {
172 $q = preg_split( '/-->/i', $p[1], 2 );
173 } else {
174 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
175 }
176 $marker = $rnd . sprintf('%08X', $n++);
177 $content[$marker] = $q[0];
178 $stripped .= $marker;
179 $text = $q[1];
180 }
181 }
182 return $stripped;
183 }
184
185 # Strips and renders <nowiki>, <pre>, <math>, <hiero>
186 # If $render is set, performs necessary rendering operations on plugins
187 # Returns the text, and fills an array with data needed in unstrip()
188 # If the $state is already a valid strip state, it adds to the state
189
190 # When $stripcomments is set, HTML comments <!-- like this -->
191 # will be stripped in addition to other tags. This is important
192 # for section editing, where these comments cause confusion when
193 # counting the sections in the wikisource
194 function strip( $text, &$state, $stripcomments = false ) {
195 $render = ($this->mOutputType == OT_HTML);
196 $nowiki_content = array();
197 $math_content = array();
198 $pre_content = array();
199 $comment_content = array();
200 $ext_content = array();
201
202 # Replace any instances of the placeholders
203 $uniq_prefix = UNIQ_PREFIX;
204 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
205
206
207 # nowiki
208 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
209 foreach( $nowiki_content as $marker => $content ){
210 if( $render ){
211 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
212 } else {
213 $nowiki_content[$marker] = "<nowiki>$content</nowiki>";
214 }
215 }
216
217 # math
218 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
219 foreach( $math_content as $marker => $content ){
220 if( $render ) {
221 if( $this->mOptions->getUseTeX() ) {
222 $math_content[$marker] = renderMath( $content );
223 } else {
224 $math_content[$marker] = "&lt;math&gt;$content&lt;math&gt;";
225 }
226 } else {
227 $math_content[$marker] = "<math>$content</math>";
228 }
229 }
230
231 # pre
232 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
233 foreach( $pre_content as $marker => $content ){
234 if( $render ){
235 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
236 } else {
237 $pre_content[$marker] = "<pre>$content</pre>";
238 }
239 }
240
241 # Comments
242 if($stripcomments) {
243 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
244 foreach( $comment_content as $marker => $content ){
245 $comment_content[$marker] = "<!--$content-->";
246 }
247 }
248
249 # Extensions
250 foreach ( $this->mTagHooks as $tag => $callback ) {
251 $ext_contents[$tag] = array();
252 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
253 foreach( $ext_content[$tag] as $marker => $content ) {
254 if ( $render ) {
255 $ext_content[$tag][$marker] = $callback( $content );
256 } else {
257 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
258 }
259 }
260 }
261
262 # Merge state with the pre-existing state, if there is one
263 if ( $state ) {
264 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
265 $state['math'] = $state['math'] + $math_content;
266 $state['pre'] = $state['pre'] + $pre_content;
267 $state['comment'] = $state['comment'] + $comment_content;
268
269 foreach( $ext_content as $tag => $array ) {
270 if ( array_key_exists( $tag, $state ) ) {
271 $state[$tag] = $state[$tag] + $array;
272 }
273 }
274 } else {
275 $state = array(
276 'nowiki' => $nowiki_content,
277 'math' => $math_content,
278 'pre' => $pre_content,
279 'comment' => $comment_content,
280 ) + $ext_content;
281 }
282 return $text;
283 }
284
285 # always call unstripNoWiki() after this one
286 function unstrip( $text, &$state ) {
287 # Must expand in reverse order, otherwise nested tags will be corrupted
288 $contentDict = end( $state );
289 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
290 if( key($state) != 'nowiki') {
291 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
292 $text = str_replace( key( $contentDict ), $content, $text );
293 }
294 }
295 }
296
297 return $text;
298 }
299 # always call this after unstrip() to preserve the order
300 function unstripNoWiki( $text, &$state ) {
301 # Must expand in reverse order, otherwise nested tags will be corrupted
302 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
303 $text = str_replace( key( $state['nowiki'] ), $content, $text );
304 }
305
306 return $text;
307 }
308
309 # Add an item to the strip state
310 # Returns the unique tag which must be inserted into the stripped text
311 # The tag will be replaced with the original text in unstrip()
312
313 function insertStripItem( $text, &$state ) {
314 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
315 if ( !$state ) {
316 $state = array(
317 'nowiki' => array(),
318 'math' => array(),
319 'pre' => array()
320 );
321 }
322 $state['item'][$rnd] = $text;
323 return $rnd;
324 }
325
326 # categoryMagic
327 # generate a list of subcategories and pages for a category
328 # depending on wfMsg("usenewcategorypage") it either calls the new
329 # or the old code. The new code will not work properly for some
330 # languages due to sorting issues, so they might want to turn it
331 # off.
332 function categoryMagic() {
333 $msg = wfMsg('usenewcategorypage');
334 if ( '0' == @$msg[0] )
335 {
336 return $this->oldCategoryMagic();
337 } else {
338 return $this->newCategoryMagic();
339 }
340 }
341
342 # This method generates the list of subcategories and pages for a category
343 function oldCategoryMagic () {
344 global $wgLang , $wgUser ;
345 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
346
347 $cns = Namespace::getCategory() ;
348 if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page
349
350 $r = "<br style=\"clear:both;\"/>\n";
351
352
353 $sk =& $wgUser->getSkin() ;
354
355 $articles = array() ;
356 $children = array() ;
357 $data = array () ;
358 $id = $this->mTitle->getArticleID() ;
359
360 # FIXME: add limits
361 $t = wfStrencode( $this->mTitle->getDBKey() );
362 $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY cl_sortkey" ;
363 $res = wfQuery ( $sql, DB_READ ) ;
364 while ( $x = wfFetchObject ( $res ) ) $data[] = $x ;
365
366 # For all pages that link to this category
367 foreach ( $data AS $x )
368 {
369 $t = $wgLang->getNsText ( $x->cur_namespace ) ;
370 if ( $t != "" ) $t .= ":" ;
371 $t .= $x->cur_title ;
372
373 if ( $x->cur_namespace == $cns ) {
374 array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory
375 } else {
376 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
377 }
378 }
379 wfFreeResult ( $res ) ;
380
381 # Showing subcategories
382 if ( count ( $children ) > 0 ) {
383 $r .= '<h2>'.wfMsg('subcategories')."</h2>\n" ;
384 $r .= implode ( ', ' , $children ) ;
385 }
386
387 # Showing pages in this category
388 if ( count ( $articles ) > 0 ) {
389 $ti = $this->mTitle->getText() ;
390 $h = wfMsg( 'category_header', $ti );
391 $r .= "<h2>{$h}</h2>\n" ;
392 $r .= implode ( ', ' , $articles ) ;
393 }
394
395
396 return $r ;
397 }
398
399
400
401 function newCategoryMagic () {
402 global $wgLang , $wgUser ;
403 if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all
404
405 $cns = Namespace::getCategory() ;
406 if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page
407
408 $r = "<br style=\"clear:both;\"/>\n";
409
410
411 $sk =& $wgUser->getSkin() ;
412
413 $articles = array() ;
414 $articles_start_char = array();
415 $children = array() ;
416 $children_start_char = array();
417 $data = array () ;
418 $id = $this->mTitle->getArticleID() ;
419
420 # FIXME: add limits
421 $t = wfStrencode( $this->mTitle->getDBKey() );
422 $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM
423 cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY
424 cl_sortkey" ;
425 $res = wfQuery ( $sql, DB_READ ) ;
426 while ( $x = wfFetchObject ( $res ) )
427 {
428 $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ;
429 if ( $t != '' ) $t .= ':' ;
430 $t .= $x->cur_title ;
431
432 if ( $x->cur_namespace == $cns ) {
433 $ctitle = str_replace( '_',' ',$x->cur_title );
434 array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory
435
436 // If there's a link from Category:A to Category:B, the sortkey of the resulting
437 // entry in the categorylinks table is Category:A, not A, which it SHOULD be.
438 // Workaround: If sortkey == "Category:".$title, than use $title for sorting,
439 // else use sortkey...
440 if ( ($ns.":".$ctitle) == $x->cl_sortkey ) {
441 array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) );
442 } else {
443 array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
444 }
445 } else {
446 array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category
447 array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ;
448 }
449 }
450 wfFreeResult ( $res ) ;
451
452 $ti = $this->mTitle->getText() ;
453
454 # Don't show subcategories section if there are none.
455 if ( count ( $children ) > 0 )
456 {
457 # Showing subcategories
458 $r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n"
459 . wfMsg( 'subcategorycount', count( $children ) );
460 if ( count ( $children ) > 6 ) {
461
462 // divide list into three equal chunks
463 $chunk = (int) (count ( $children ) / 3);
464
465 // get and display header
466 $r .= '<table width="100%"><tr valign="top">';
467
468 $startChunk = 0;
469 $endChunk = $chunk;
470
471 // loop through the chunks
472 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
473 $chunkIndex < 3;
474 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
475 {
476
477 $r .= '<td><ul>';
478 // output all subcategories to category
479 for ($index = $startChunk ;
480 $index < $endChunk && $index < count($children);
481 $index++ )
482 {
483 // check for change of starting letter or begging of chunk
484 if ( ($children_start_char[$index] != $children_start_char[$index - 1])
485 || ($index == $startChunk) )
486 {
487 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
488 }
489
490 $r .= "<li>{$children[$index]}</li>";
491 }
492 $r .= '</ul></td>';
493
494
495 }
496 $r .= '</tr></table>';
497 } else {
498 // for short lists of subcategories to category.
499
500 $r .= "<h3>{$children_start_char[0]}</h3>\n";
501 $r .= '<ul><li>'.$children[0].'</li>';
502 for ($index = 1; $index < count($children); $index++ )
503 {
504 if ($children_start_char[$index] != $children_start_char[$index - 1])
505 {
506 $r .= "</ul><h3>{$children_start_char[$index]}</h3>\n<ul>";
507 }
508
509 $r .= "<li>{$children[$index]}</li>";
510 }
511 $r .= '</ul>';
512 }
513 } # END of if ( count($children) > 0 )
514
515 $r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n" .
516 wfMsg( 'categoryarticlecount', count( $articles ) );
517
518 # Showing articles in this category
519 if ( count ( $articles ) > 6) {
520 $ti = $this->mTitle->getText() ;
521
522 // divide list into three equal chunks
523 $chunk = (int) (count ( $articles ) / 3);
524
525 // get and display header
526 $r .= '<table width="100%"><tr valign="top">';
527
528 // loop through the chunks
529 for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0;
530 $chunkIndex < 3;
531 $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1)
532 {
533
534 $r .= '<td><ul>';
535
536 // output all articles in category
537 for ($index = $startChunk ;
538 $index < $endChunk && $index < count($articles);
539 $index++ )
540 {
541 // check for change of starting letter or begging of chunk
542 if ( ($articles_start_char[$index] != $articles_start_char[$index - 1])
543 || ($index == $startChunk) )
544 {
545 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
546 }
547
548 $r .= "<li>{$articles[$index]}</li>";
549 }
550 $r .= '</ul></td>';
551
552
553 }
554 $r .= '</tr></table>';
555 } elseif ( count ( $articles ) > 0) {
556 // for short lists of articles in categories.
557 $ti = $this->mTitle->getText() ;
558
559 $r .= '<h3>'.$articles_start_char[0]."</h3>\n";
560 $r .= '<ul><li>'.$articles[0].'</li>';
561 for ($index = 1; $index < count($articles); $index++ )
562 {
563 if ($articles_start_char[$index] != $articles_start_char[$index - 1])
564 {
565 $r .= "</ul><h3>{$articles_start_char[$index]}</h3>\n<ul>";
566 }
567
568 $r .= "<li>{$articles[$index]}</li>";
569 }
570 $r .= '</ul>';
571 }
572
573
574 return $r ;
575 }
576
577 # Return allowed HTML attributes
578 function getHTMLattrs () {
579 $htmlattrs = array( # Allowed attributes--no scripting, etc.
580 'title', 'align', 'lang', 'dir', 'width', 'height',
581 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
582 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
583 /* FONT */ 'type', 'start', 'value', 'compact',
584 /* For various lists, mostly deprecated but safe */
585 'summary', 'width', 'border', 'frame', 'rules',
586 'cellspacing', 'cellpadding', 'valign', 'char',
587 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
588 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
589 'id', 'class', 'name', 'style' /* For CSS */
590 );
591 return $htmlattrs ;
592 }
593
594 # Remove non approved attributes and javascript in css
595 function fixTagAttributes ( $t ) {
596 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
597 $htmlattrs = $this->getHTMLattrs() ;
598
599 # Strip non-approved attributes from the tag
600 $t = preg_replace(
601 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
602 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
603 $t);
604 # Strip javascript "expression" from stylesheets. Brute force approach:
605 # If anythin offensive is found, all attributes of the HTML tag are dropped
606
607 if( preg_match(
608 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
609 wfMungeToUtf8( $t ) ) )
610 {
611 $t='';
612 }
613
614 return trim ( $t ) ;
615 }
616
617 # interface with html tidy, used if $wgUseTidy = true
618 function tidy ( $text ) {
619 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
620 global $wgInputEncoding, $wgOutputEncoding;
621 $fname = 'Parser::tidy';
622 wfProfileIn( $fname );
623
624 $cleansource = '';
625 switch(strtoupper($wgOutputEncoding)) {
626 case 'ISO-8859-1':
627 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
628 break;
629 case 'UTF-8':
630 $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
631 break;
632 default:
633 $wgTidyOpts .= ' -raw';
634 }
635
636 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
637 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
638 '<head><title>test</title></head><body>'.$text.'</body></html>';
639 $descriptorspec = array(
640 0 => array('pipe', 'r'),
641 1 => array('pipe', 'w'),
642 2 => array('file', '/dev/null', 'a')
643 );
644 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes);
645 if (is_resource($process)) {
646 fwrite($pipes[0], $wrappedtext);
647 fclose($pipes[0]);
648 while (!feof($pipes[1])) {
649 $cleansource .= fgets($pipes[1], 1024);
650 }
651 fclose($pipes[1]);
652 $return_value = proc_close($process);
653 }
654
655 wfProfileOut( $fname );
656
657 if( $cleansource == '' && $text != '') {
658 wfDebug( "Tidy error detected!\n" );
659 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
660 } else {
661 return $cleansource;
662 }
663 }
664
665 # parse the wiki syntax used to render tables
666 function doTableStuff ( $t ) {
667 $t = explode ( "\n" , $t ) ;
668 $td = array () ; # Is currently a td tag open?
669 $ltd = array () ; # Was it TD or TH?
670 $tr = array () ; # Is currently a tr tag open?
671 $ltr = array () ; # tr attributes
672 foreach ( $t AS $k => $x )
673 {
674 $x = trim ( $x ) ;
675 $fc = substr ( $x , 0 , 1 ) ;
676 if ( '{|' == substr ( $x , 0 , 2 ) )
677 {
678 $t[$k] = "\n<table " . $this->fixTagAttributes ( substr ( $x , 3 ) ) . '>' ;
679 array_push ( $td , false ) ;
680 array_push ( $ltd , '' ) ;
681 array_push ( $tr , false ) ;
682 array_push ( $ltr , '' ) ;
683 }
684 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
685 else if ( '|}' == substr ( $x , 0 , 2 ) )
686 {
687 $z = "</table>\n" ;
688 $l = array_pop ( $ltd ) ;
689 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
690 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
691 array_pop ( $ltr ) ;
692 $t[$k] = $z ;
693 }
694 /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption
695 {
696 $z = trim ( substr ( $x , 2 ) ) ;
697 $t[$k] = "<caption>{$z}</caption>\n" ;
698 }*/
699 else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |---------------
700 {
701 $x = substr ( $x , 1 ) ;
702 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
703 $z = '' ;
704 $l = array_pop ( $ltd ) ;
705 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
706 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
707 array_pop ( $ltr ) ;
708 $t[$k] = $z ;
709 array_push ( $tr , false ) ;
710 array_push ( $td , false ) ;
711 array_push ( $ltd , '' ) ;
712 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
713 }
714 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption
715 {
716 if ( '|+' == substr ( $x , 0 , 2 ) )
717 {
718 $fc = '+' ;
719 $x = substr ( $x , 1 ) ;
720 }
721 $after = substr ( $x , 1 ) ;
722 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
723 $after = explode ( '||' , $after ) ;
724 $t[$k] = '' ;
725 foreach ( $after AS $theline )
726 {
727 $z = '' ;
728 if ( $fc != '+' )
729 {
730 $tra = array_pop ( $ltr ) ;
731 if ( !array_pop ( $tr ) ) $z = "<tr {$tra}>\n" ;
732 array_push ( $tr , true ) ;
733 array_push ( $ltr , '' ) ;
734 }
735
736 $l = array_pop ( $ltd ) ;
737 if ( array_pop ( $td ) ) $z = "</{$l}>" . $z ;
738 if ( $fc == '|' ) $l = 'td' ;
739 else if ( $fc == '!' ) $l = 'th' ;
740 else if ( $fc == '+' ) $l = 'caption' ;
741 else $l = '' ;
742 array_push ( $ltd , $l ) ;
743 $y = explode ( '|' , $theline , 2 ) ;
744 if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ;
745 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
746 $t[$k] .= $y ;
747 array_push ( $td , true ) ;
748 }
749 }
750 }
751
752 # Closing open td, tr && table
753 while ( count ( $td ) > 0 )
754 {
755 if ( array_pop ( $td ) ) $t[] = '</td>' ;
756 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
757 $t[] = '</table>' ;
758 }
759
760 $t = implode ( "\n" , $t ) ;
761 # $t = $this->removeHTMLtags( $t );
762 return $t ;
763 }
764
765 # Parses the text and adds the result to the strip state
766 # Returns the strip tag
767 function stripParse( $text, $newline, $args )
768 {
769 $text = $this->strip( $text, $this->mStripState );
770 $text = $this->internalParse( $text, (bool)$newline, $args, false );
771 return $newline.$this->insertStripItem( $text, $this->mStripState );
772 }
773
774 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
775 $fname = 'Parser::internalParse';
776 wfProfileIn( $fname );
777
778 $text = $this->removeHTMLtags( $text );
779 $text = $this->replaceVariables( $text, $args );
780
781 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
782
783 $text = $this->doHeadings( $text );
784 if($this->mOptions->getUseDynamicDates()) {
785 global $wgDateFormatter;
786 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
787 }
788 $text = $this->doAllQuotes( $text );
789 // $text = $this->doExponent( $text );
790 $text = $this->replaceExternalLinks( $text );
791 $text = $this->replaceInternalLinks ( $text );
792 $text = $this->replaceInternalLinks ( $text );
793 //$text = $this->doTokenizedParser ( $text );
794 $text = $this->doTableStuff ( $text ) ;
795 $text = $this->magicISBN( $text );
796 $text = $this->magicRFC( $text );
797 $text = $this->formatHeadings( $text, $isMain );
798 $sk =& $this->mOptions->getSkin();
799 $text = $sk->transformContent( $text );
800
801 if ( !isset ( $this->categoryMagicDone ) ) {
802 $text .= $this->categoryMagic () ;
803 $this->categoryMagicDone = true ;
804 }
805
806 wfProfileOut( $fname );
807 return $text;
808 }
809
810 # Parse ^^ tokens and return html
811 /* private */ function doExponent ( $text )
812 {
813 $fname = 'Parser::doExponent';
814 wfProfileIn( $fname);
815 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
816 wfProfileOut( $fname);
817 return $text;
818 }
819
820 # Parse headers and return html
821 /* private */ function doHeadings( $text ) {
822 $fname = 'Parser::doHeadings';
823 wfProfileIn( $fname );
824 for ( $i = 6; $i >= 1; --$i ) {
825 $h = substr( '======', 0, $i );
826 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
827 "<h{$i}>\\1</h{$i}>\\2", $text );
828 }
829 wfProfileOut( $fname );
830 return $text;
831 }
832
833 /* private */ function doAllQuotes( $text ) {
834 $fname = 'Parser::doAllQuotes';
835 wfProfileIn( $fname );
836 $outtext = '';
837 $lines = explode( "\n", $text );
838 foreach ( $lines as $line ) {
839 $outtext .= $this->doQuotes ( '', $line, '' ) . "\n";
840 }
841 $outtext = substr($outtext, 0,-1);
842 wfProfileOut( $fname );
843 return $outtext;
844 }
845
846 /* private */ function doQuotes( $pre, $text, $mode ) {
847 if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) {
848 $m1_strong = ($m[1] == "") ? "" : "<strong>{$m[1]}</strong>";
849 $m1_em = ($m[1] == "") ? "" : "<em>{$m[1]}</em>";
850 if ( substr ($m[2], 0, 1) == '\'' ) {
851 $m[2] = substr ($m[2], 1);
852 if ($mode == 'em') {
853 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' );
854 } else if ($mode == 'strong') {
855 return $m1_strong . $this->doQuotes ( '', $m[2], '' );
856 } else if (($mode == 'emstrong') || ($mode == 'both')) {
857 return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' );
858 } else if ($mode == 'strongem') {
859 return "<strong>{$pre}{$m1_em}</strong>" . $this->doQuotes ( '', $m[2], 'em' );
860 } else {
861 return $m[1] . $this->doQuotes ( '', $m[2], 'strong' );
862 }
863 } else {
864 if ($mode == 'strong') {
865 return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' );
866 } else if ($mode == 'em') {
867 return $m1_em . $this->doQuotes ( '', $m[2], '' );
868 } else if ($mode == 'emstrong') {
869 return "<em>{$pre}{$m1_strong}</em>" . $this->doQuotes ( '', $m[2], 'strong' );
870 } else if (($mode == 'strongem') || ($mode == 'both')) {
871 return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' );
872 } else {
873 return $m[1] . $this->doQuotes ( '', $m[2], 'em' );
874 }
875 }
876 } else {
877 $text_strong = ($text == '') ? '' : "<strong>{$text}</strong>";
878 $text_em = ($text == '') ? '' : "<em>{$text}</em>";
879 if ($mode == '') {
880 return $pre . $text;
881 } else if ($mode == 'em') {
882 return $pre . $text_em;
883 } else if ($mode == 'strong') {
884 return $pre . $text_strong;
885 } else if ($mode == 'strongem') {
886 return (($pre == '') && ($text == '')) ? '' : "<strong>{$pre}{$text_em}</strong>";
887 } else {
888 return (($pre == '') && ($text == '')) ? '' : "<em>{$pre}{$text_strong}</em>";
889 }
890 }
891 }
892
893 # Note: we have to do external links before the internal ones,
894 # and otherwise take great care in the order of things here, so
895 # that we don't end up interpreting some URLs twice.
896
897 /* private */ function replaceExternalLinks( $text ) {
898 $fname = 'Parser::replaceExternalLinks';
899 wfProfileIn( $fname );
900 $text = $this->subReplaceExternalLinks( $text, 'http', true );
901 $text = $this->subReplaceExternalLinks( $text, 'https', true );
902 $text = $this->subReplaceExternalLinks( $text, 'ftp', false );
903 $text = $this->subReplaceExternalLinks( $text, 'irc', false );
904 $text = $this->subReplaceExternalLinks( $text, 'gopher', false );
905 $text = $this->subReplaceExternalLinks( $text, 'news', false );
906 $text = $this->subReplaceExternalLinks( $text, 'mailto', false );
907 wfProfileOut( $fname );
908 return $text;
909 }
910
911 /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) {
912 $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3';
913 $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF";
914
915 # this is the list of separators that should be ignored if they
916 # are the last character of an URL but that should be included
917 # if they occur within the URL, e.g. "go to www.foo.com, where .."
918 # in this case, the last comma should not become part of the URL,
919 # but in "www.foo.com/123,2342,32.htm" it should.
920 $sep = ",;\.:";
921 $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF';
922 $images = 'gif|png|jpg|jpeg';
923
924 # PLEASE NOTE: The curly braces { } are not part of the regex,
925 # they are interpreted as part of the string (used to tell PHP
926 # that the content of the string should be inserted there).
927 $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." .
928 "((?i){$images})([^{$uc}]|$)/";
929
930 $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/";
931 $sk =& $this->mOptions->getSkin();
932
933 if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls
934 $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" .
935 '/\\4.\\5', '\\4.\\5' ) . '\\6', $s );
936 }
937 $s = preg_replace( $e2, '\\1' . "<a href=\"{$unique}:\\3\"" .
938 $sk->getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML(
939 "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) .
940 '</a>\\5', $s );
941 $s = str_replace( $unique, $protocol, $s );
942
943 $a = explode( "[{$protocol}:", " " . $s );
944 $s = array_shift( $a );
945 $s = substr( $s, 1 );
946
947 $e1 = "/^([{$uc}"."{$sep}]+)](.*)\$/sD";
948 $e2 = "/^([{$uc}"."{$sep}]+)\\s+([^\\]]+)](.*)\$/sD";
949
950 foreach ( $a as $line ) {
951 if ( preg_match( $e1, $line, $m ) ) {
952 $link = "{$protocol}:{$m[1]}";
953 $trail = $m[2];
954 if ( $autonumber ) { $text = "[" . ++$this->mAutonumber . "]"; }
955 else { $text = wfEscapeHTML( $link ); }
956 } else if ( preg_match( $e2, $line, $m ) ) {
957 $link = "{$protocol}:{$m[1]}";
958 $text = $m[2];
959 $trail = $m[3];
960 } else {
961 $s .= "[{$protocol}:" . $line;
962 continue;
963 }
964 if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) {
965 $paren = '';
966 } else {
967 # Expand the URL for printable version
968 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $link ) . "</i>)</span>";
969 }
970 $la = $sk->getExternalLinkAttributes( $link, $text );
971 $s .= "<a href='{$link}'{$la}>{$text}</a>{$paren}{$trail}";
972
973 }
974 return $s;
975 }
976
977
978 /* private */ function replaceInternalLinks( $s ) {
979 global $wgLang, $wgLinkCache;
980 global $wgNamespacesWithSubpages, $wgLanguageCode;
981 static $fname = 'Parser::replaceInternalLinks' ;
982 wfProfileIn( $fname );
983
984 wfProfileIn( $fname.'-setup' );
985 static $tc = FALSE;
986 # the % is needed to support urlencoded titles as well
987 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
988 $sk =& $this->mOptions->getSkin();
989
990 $a = explode( '[[', ' ' . $s );
991 $s = array_shift( $a );
992 $s = substr( $s, 1 );
993
994 # Match a link having the form [[namespace:link|alternate]]trail
995 static $e1 = FALSE;
996 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
997 # Match the end of a line for a word that's not followed by whitespace,
998 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
999 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1000
1001 $useLinkPrefixExtension = $wgLang->linkPrefixExtension();
1002 # Special and Media are pseudo-namespaces; no pages actually exist in them
1003 static $image = FALSE;
1004 static $special = FALSE;
1005 static $media = FALSE;
1006 static $category = FALSE;
1007 if ( !$image ) { $image = Namespace::getImage(); }
1008 if ( !$special ) { $special = Namespace::getSpecial(); }
1009 if ( !$media ) { $media = Namespace::getMedia(); }
1010 if ( !$category ) { $category = Namespace::getCategory(); }
1011
1012 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1013
1014 if ( $useLinkPrefixExtension ) {
1015 if ( preg_match( $e2, $s, $m ) ) {
1016 $first_prefix = $m[2];
1017 $s = $m[1];
1018 } else {
1019 $first_prefix = false;
1020 }
1021 } else {
1022 $prefix = '';
1023 }
1024
1025 wfProfileOut( $fname.'-setup' );
1026
1027 foreach ( $a as $line ) {
1028 wfProfileIn( $fname.'-prefixhandling' );
1029 if ( $useLinkPrefixExtension ) {
1030 if ( preg_match( $e2, $s, $m ) ) {
1031 $prefix = $m[2];
1032 $s = $m[1];
1033 } else {
1034 $prefix='';
1035 }
1036 # first link
1037 if($first_prefix) {
1038 $prefix = $first_prefix;
1039 $first_prefix = false;
1040 }
1041 }
1042 wfProfileOut( $fname.'-prefixhandling' );
1043
1044 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1045 $text = $m[2];
1046 # fix up urlencoded title texts
1047 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1048 $trail = $m[3];
1049 } else { # Invalid form; output directly
1050 $s .= $prefix . '[[' . $line ;
1051 continue;
1052 }
1053
1054 /* Valid link forms:
1055 Foobar -- normal
1056 :Foobar -- override special treatment of prefix (images, language links)
1057 /Foobar -- convert to CurrentPage/Foobar
1058 /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1059 */
1060 $c = substr($m[1],0,1);
1061 $noforce = ($c != ':');
1062 if( $c == '/' ) { # subpage
1063 if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown
1064 $m[1]=substr($m[1],1,strlen($m[1])-2);
1065 $noslash=$m[1];
1066 } else {
1067 $noslash=substr($m[1],1);
1068 }
1069 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here
1070 $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1071 if( '' == $text ) {
1072 $text= $m[1];
1073 } # this might be changed for ugliness reasons
1074 } else {
1075 $link = $noslash; # no subpage allowed, use standard link
1076 }
1077 } elseif( $noforce ) { # no subpage
1078 $link = $m[1];
1079 } else {
1080 $link = substr( $m[1], 1 );
1081 }
1082 $wasblank = ( '' == $text );
1083 if( $wasblank )
1084 $text = $link;
1085
1086 $nt = Title::newFromText( $link );
1087 if( !$nt ) {
1088 $s .= $prefix . '[[' . $line;
1089 continue;
1090 }
1091 $ns = $nt->getNamespace();
1092 $iw = $nt->getInterWiki();
1093 if( $noforce ) {
1094 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) {
1095 array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() );
1096 $tmp = $prefix . $trail ;
1097 $s .= (trim($tmp) == '')? '': $tmp;
1098 continue;
1099 }
1100 if ( $ns == $image ) {
1101 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1102 $wgLinkCache->addImageLinkObj( $nt );
1103 continue;
1104 }
1105 if ( $ns == $category ) {
1106 $t = $nt->getText() ;
1107 $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ;
1108
1109 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1110 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1111 $wgLinkCache->resume();
1112
1113 $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text;
1114 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1115 $this->mOutput->mCategoryLinks[] = $t ;
1116 $s .= $prefix . $trail ;
1117 continue;
1118 }
1119 }
1120 if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) &&
1121 ( strpos( $link, '#' ) == FALSE ) ) {
1122 # Self-links are handled specially; generally de-link and change to bold.
1123 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1124 continue;
1125 }
1126
1127 if( $ns == $media ) {
1128 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1129 $wgLinkCache->addImageLinkObj( $nt );
1130 continue;
1131 } elseif( $ns == $special ) {
1132 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1133 continue;
1134 }
1135 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1136 }
1137 wfProfileOut( $fname );
1138 return $s;
1139 }
1140
1141 # Some functions here used by doBlockLevels()
1142 #
1143 /* private */ function closeParagraph() {
1144 $result = '';
1145 if ( '' != $this->mLastSection ) {
1146 $result = '</' . $this->mLastSection . ">\n";
1147 }
1148 $this->mInPre = false;
1149 $this->mLastSection = '';
1150 return $result;
1151 }
1152 # getCommon() returns the length of the longest common substring
1153 # of both arguments, starting at the beginning of both.
1154 #
1155 /* private */ function getCommon( $st1, $st2 ) {
1156 $fl = strlen( $st1 );
1157 $shorter = strlen( $st2 );
1158 if ( $fl < $shorter ) { $shorter = $fl; }
1159
1160 for ( $i = 0; $i < $shorter; ++$i ) {
1161 if ( $st1{$i} != $st2{$i} ) { break; }
1162 }
1163 return $i;
1164 }
1165 # These next three functions open, continue, and close the list
1166 # element appropriate to the prefix character passed into them.
1167 #
1168 /* private */ function openList( $char )
1169 {
1170 $result = $this->closeParagraph();
1171
1172 if ( '*' == $char ) { $result .= '<ul><li>'; }
1173 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1174 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1175 else if ( ';' == $char ) {
1176 $result .= '<dl><dt>';
1177 $this->mDTopen = true;
1178 }
1179 else { $result = '<!-- ERR 1 -->'; }
1180
1181 return $result;
1182 }
1183
1184 /* private */ function nextItem( $char ) {
1185 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1186 else if ( ':' == $char || ';' == $char ) {
1187 $close = "</dd>";
1188 if ( $this->mDTopen ) { $close = '</dt>'; }
1189 if ( ';' == $char ) {
1190 $this->mDTopen = true;
1191 return $close . '<dt>';
1192 } else {
1193 $this->mDTopen = false;
1194 return $close . '<dd>';
1195 }
1196 }
1197 return '<!-- ERR 2 -->';
1198 }
1199
1200 /* private */function closeList( $char ) {
1201 if ( '*' == $char ) { $text = '</li></ul>'; }
1202 else if ( '#' == $char ) { $text = '</li></ol>'; }
1203 else if ( ':' == $char ) {
1204 if ( $this->mDTopen ) {
1205 $this->mDTopen = false;
1206 $text = '</dt></dl>';
1207 } else {
1208 $text = '</dd></dl>';
1209 }
1210 }
1211 else { return '<!-- ERR 3 -->'; }
1212 return $text."\n";
1213 }
1214
1215 /* private */ function doBlockLevels( $text, $linestart ) {
1216 $fname = 'Parser::doBlockLevels';
1217 wfProfileIn( $fname );
1218
1219 # Parsing through the text line by line. The main thing
1220 # happening here is handling of block-level elements p, pre,
1221 # and making lists from lines starting with * # : etc.
1222 #
1223 $textLines = explode( "\n", $text );
1224
1225 $lastPrefix = $output = $lastLine = '';
1226 $this->mDTopen = $inBlockElem = false;
1227 $prefixLength = 0;
1228 $paragraphStack = false;
1229
1230 if ( !$linestart ) {
1231 $output .= array_shift( $textLines );
1232 }
1233 foreach ( $textLines as $oLine ) {
1234 $lastPrefixLength = strlen( $lastPrefix );
1235 $preCloseMatch = preg_match("/<\\/pre/i", $oLine );
1236 $preOpenMatch = preg_match("/<pre/i", $oLine );
1237 if (!$this->mInPre) {
1238 $this->mInPre = !empty($preOpenMatch);
1239 }
1240 if ( !$this->mInPre ) {
1241 # Multiple prefixes may abut each other for nested lists.
1242 $prefixLength = strspn( $oLine, '*#:;' );
1243 $pref = substr( $oLine, 0, $prefixLength );
1244
1245 # eh?
1246 $pref2 = str_replace( ';', ':', $pref );
1247 $t = substr( $oLine, $prefixLength );
1248 } else {
1249 # Don't interpret any other prefixes in preformatted text
1250 $prefixLength = 0;
1251 $pref = $pref2 = '';
1252 $t = $oLine;
1253 }
1254
1255 # List generation
1256 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1257 # Same as the last item, so no need to deal with nesting or opening stuff
1258 $output .= $this->nextItem( substr( $pref, -1 ) );
1259 $paragraphStack = false;
1260
1261 if ( ";" == substr( $pref, -1 ) ) {
1262 # The one nasty exception: definition lists work like this:
1263 # ; title : definition text
1264 # So we check for : in the remainder text to split up the
1265 # title and definition, without b0rking links.
1266 # FIXME: This is not foolproof. Something better in Tokenizer might help.
1267 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1268 $term = $match[1];
1269 $output .= $term . $this->nextItem( ':' );
1270 $t = $match[2];
1271 }
1272 }
1273 } elseif( $prefixLength || $lastPrefixLength ) {
1274 # Either open or close a level...
1275 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1276 $paragraphStack = false;
1277
1278 while( $commonPrefixLength < $lastPrefixLength ) {
1279 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1280 --$lastPrefixLength;
1281 }
1282 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1283 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1284 }
1285 while ( $prefixLength > $commonPrefixLength ) {
1286 $char = substr( $pref, $commonPrefixLength, 1 );
1287 $output .= $this->openList( $char );
1288
1289 if ( ';' == $char ) {
1290 # FIXME: This is dupe of code above
1291 if( preg_match( '/^(.*?(?:\s|&nbsp;)):(.*)$/', $t, $match ) ) {
1292 $term = $match[1];
1293 $output .= $term . $this->nextItem( ":" );
1294 $t = $match[2];
1295 }
1296 }
1297 ++$commonPrefixLength;
1298 }
1299 $lastPrefix = $pref2;
1300 }
1301 if( 0 == $prefixLength ) {
1302 # No prefix (not in list)--go to paragraph mode
1303 $uniq_prefix = UNIQ_PREFIX;
1304 // XXX: use a stack for nestable elements like span, table and div
1305 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1306 $closematch = preg_match(
1307 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1308 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1309 if ( $openmatch or $closematch ) {
1310 $paragraphStack = false;
1311 $output .= $this->closeParagraph();
1312 if($preOpenMatch and !$preCloseMatch) {
1313 $this->mInPre = true;
1314 }
1315 if ( $closematch ) {
1316 $inBlockElem = false;
1317 } else {
1318 $inBlockElem = true;
1319 }
1320 } else if ( !$inBlockElem && !$this->mInPre ) {
1321 if ( " " == $t{0} and trim($t) != '' ) {
1322 // pre
1323 if ($this->mLastSection != 'pre') {
1324 $paragraphStack = false;
1325 $output .= $this->closeParagraph().'<pre>';
1326 $this->mLastSection = 'pre';
1327 }
1328 } else {
1329 // paragraph
1330 if ( '' == trim($t) ) {
1331 if ( $paragraphStack ) {
1332 $output .= $paragraphStack.'<br />';
1333 $paragraphStack = false;
1334 $this->mLastSection = 'p';
1335 } else {
1336 if ($this->mLastSection != 'p' ) {
1337 $output .= $this->closeParagraph();
1338 $this->mLastSection = '';
1339 $paragraphStack = '<p>';
1340 } else {
1341 $paragraphStack = '</p><p>';
1342 }
1343 }
1344 } else {
1345 if ( $paragraphStack ) {
1346 $output .= $paragraphStack;
1347 $paragraphStack = false;
1348 $this->mLastSection = 'p';
1349 } else if ($this->mLastSection != 'p') {
1350 $output .= $this->closeParagraph().'<p>';
1351 $this->mLastSection = 'p';
1352 }
1353 }
1354 }
1355 }
1356 }
1357 if ($paragraphStack === false) {
1358 $output .= $t."\n";
1359 }
1360 }
1361 while ( $prefixLength ) {
1362 $output .= $this->closeList( $pref2{$prefixLength-1} );
1363 --$prefixLength;
1364 }
1365 if ( '' != $this->mLastSection ) {
1366 $output .= '</' . $this->mLastSection . '>';
1367 $this->mLastSection = '';
1368 }
1369
1370 wfProfileOut( $fname );
1371 return $output;
1372 }
1373
1374 # Return value of a magic variable (like PAGENAME)
1375 function getVariableValue( $index ) {
1376 global $wgLang, $wgSitename, $wgServer;
1377
1378 switch ( $index ) {
1379 case MAG_CURRENTMONTH:
1380 return date( 'm' );
1381 case MAG_CURRENTMONTHNAME:
1382 return $wgLang->getMonthName( date('n') );
1383 case MAG_CURRENTMONTHNAMEGEN:
1384 return $wgLang->getMonthNameGen( date('n') );
1385 case MAG_CURRENTDAY:
1386 return date('j');
1387 case MAG_PAGENAME:
1388 return $this->mTitle->getText();
1389 case MAG_NAMESPACE:
1390 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1391 return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori
1392 case MAG_CURRENTDAYNAME:
1393 return $wgLang->getWeekdayName( date('w')+1 );
1394 case MAG_CURRENTYEAR:
1395 return date( 'Y' );
1396 case MAG_CURRENTTIME:
1397 return $wgLang->time( wfTimestampNow(), false );
1398 case MAG_NUMBEROFARTICLES:
1399 return wfNumberOfArticles();
1400 case MAG_SITENAME:
1401 return $wgSitename;
1402 case MAG_SERVER:
1403 return $wgServer;
1404 default:
1405 return NULL;
1406 }
1407 }
1408
1409 # initialise the magic variables (like CURRENTMONTHNAME)
1410 function initialiseVariables() {
1411 global $wgVariableIDs;
1412 $this->mVariables = array();
1413 foreach ( $wgVariableIDs as $id ) {
1414 $mw =& MagicWord::get( $id );
1415 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1416 }
1417 }
1418
1419 /* private */ function replaceVariables( $text, $args = array() ) {
1420 global $wgLang, $wgScript, $wgArticlePath;
1421
1422 $fname = 'Parser::replaceVariables';
1423 wfProfileIn( $fname );
1424
1425 $bail = false;
1426 if ( !$this->mVariables ) {
1427 $this->initialiseVariables();
1428 }
1429 $titleChars = Title::legalChars();
1430 $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars );
1431
1432 # This function is called recursively. To keep track of arguments we need a stack:
1433 array_push( $this->mArgStack, $args );
1434
1435 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1436 $GLOBALS['wgCurParser'] =& $this;
1437
1438
1439 if ( $this->mOutputType == OT_HTML ) {
1440 # Variable substitution
1441 $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text );
1442
1443 # Argument substitution
1444 $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1445 }
1446 # Template substitution
1447 $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s';
1448 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1449
1450 array_pop( $this->mArgStack );
1451
1452 wfProfileOut( $fname );
1453 return $text;
1454 }
1455
1456 function variableSubstitution( $matches ) {
1457 if ( array_key_exists( $matches[1], $this->mVariables ) ) {
1458 $text = $this->mVariables[$matches[1]];
1459 $this->mOutput->mContainsOldMagic = true;
1460 } else {
1461 $text = $matches[0];
1462 }
1463 return $text;
1464 }
1465
1466 function braceSubstitution( $matches ) {
1467 global $wgLinkCache, $wgLang;
1468 $fname = 'Parser::braceSubstitution';
1469 $found = false;
1470 $nowiki = false;
1471 $noparse = false;
1472
1473 $title = NULL;
1474
1475 # $newline is an optional newline character before the braces
1476 # $part1 is the bit before the first |, and must contain only title characters
1477 # $args is a list of arguments, starting from index 0, not including $part1
1478
1479 $newline = $matches[1];
1480 $part1 = $matches[2];
1481 # If the third subpattern matched anything, it will start with |
1482 if ( $matches[3] !== '' ) {
1483 $args = explode( '|', substr( $matches[3], 1 ) );
1484 } else {
1485 $args = array();
1486 }
1487 $argc = count( $args );
1488
1489 # {{{}}}
1490 if ( strpos( $matches[0], '{{{' ) !== false ) {
1491 $text = $matches[0];
1492 $found = true;
1493 $noparse = true;
1494 }
1495
1496 # SUBST
1497 if ( !$found ) {
1498 $mwSubst =& MagicWord::get( MAG_SUBST );
1499 if ( $mwSubst->matchStartAndRemove( $part1 ) ) {
1500 if ( $this->mOutputType != OT_WIKI ) {
1501 # Invalid SUBST not replaced at PST time
1502 # Return without further processing
1503 $text = $matches[0];
1504 $found = true;
1505 $noparse= true;
1506 }
1507 } elseif ( $this->mOutputType == OT_WIKI ) {
1508 # SUBST not found in PST pass, do nothing
1509 $text = $matches[0];
1510 $found = true;
1511 }
1512 }
1513
1514 # MSG, MSGNW and INT
1515 if ( !$found ) {
1516 # Check for MSGNW:
1517 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1518 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1519 $nowiki = true;
1520 } else {
1521 # Remove obsolete MSG:
1522 $mwMsg =& MagicWord::get( MAG_MSG );
1523 $mwMsg->matchStartAndRemove( $part1 );
1524 }
1525
1526 # Check if it is an internal message
1527 $mwInt =& MagicWord::get( MAG_INT );
1528 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1529 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1530 $text = wfMsgReal( $part1, $args, true );
1531 $found = true;
1532 }
1533 }
1534 }
1535
1536 # NS
1537 if ( !$found ) {
1538 # Check for NS: (namespace expansion)
1539 $mwNs = MagicWord::get( MAG_NS );
1540 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1541 if ( intval( $part1 ) ) {
1542 $text = $wgLang->getNsText( intval( $part1 ) );
1543 $found = true;
1544 } else {
1545 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1546 if ( !is_null( $index ) ) {
1547 $text = $wgLang->getNsText( $index );
1548 $found = true;
1549 }
1550 }
1551 }
1552 }
1553
1554 # LOCALURL and LOCALURLE
1555 if ( !$found ) {
1556 $mwLocal = MagicWord::get( MAG_LOCALURL );
1557 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1558
1559 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1560 $func = 'getLocalURL';
1561 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1562 $func = 'escapeLocalURL';
1563 } else {
1564 $func = '';
1565 }
1566
1567 if ( $func !== '' ) {
1568 $title = Title::newFromText( $part1 );
1569 if ( !is_null( $title ) ) {
1570 if ( $argc > 0 ) {
1571 $text = $title->$func( $args[0] );
1572 } else {
1573 $text = $title->$func();
1574 }
1575 $found = true;
1576 }
1577 }
1578 }
1579
1580 # Internal variables
1581 if ( !$found && array_key_exists( $part1, $this->mVariables ) ) {
1582 $text = $this->mVariables[$part1];
1583 $found = true;
1584 $this->mOutput->mContainsOldMagic = true;
1585 }
1586 /*
1587 # Arguments input from the caller
1588 $inputArgs = end( $this->mArgStack );
1589 if ( !$found && array_key_exists( $part1, $inputArgs ) ) {
1590 $text = $inputArgs[$part1];
1591 $found = true;
1592 }
1593 */
1594 # Load from database
1595 if ( !$found ) {
1596 $title = Title::newFromText( $part1, NS_TEMPLATE );
1597 if ( !is_null( $title ) && !$title->isExternal() ) {
1598 # Check for excessive inclusion
1599 $dbk = $title->getPrefixedDBkey();
1600 if ( $this->incrementIncludeCount( $dbk ) ) {
1601 $article = new Article( $title );
1602 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1603 if ( $articleContent !== false ) {
1604 $found = true;
1605 $text = $articleContent;
1606
1607 }
1608 }
1609
1610 # If the title is valid but undisplayable, make a link to it
1611 if ( $this->mOutputType == OT_HTML && !$found ) {
1612 $text = '[[' . $title->getPrefixedText() . ']]';
1613 $found = true;
1614 }
1615 }
1616 }
1617
1618 # Recursive parsing, escaping and link table handling
1619 # Only for HTML output
1620 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1621 $text = wfEscapeWikiText( $text );
1622 } elseif ( $this->mOutputType == OT_HTML && $found && !$noparse) {
1623 # Clean up argument array
1624 $assocArgs = array();
1625 $index = 1;
1626 foreach( $args as $arg ) {
1627 $eqpos = strpos( $arg, '=' );
1628 if ( $eqpos === false ) {
1629 $assocArgs[$index++] = $arg;
1630 } else {
1631 $name = trim( substr( $arg, 0, $eqpos ) );
1632 $value = trim( substr( $arg, $eqpos+1 ) );
1633 if ( $value === false ) {
1634 $value = '';
1635 }
1636 if ( $name !== false ) {
1637 $assocArgs[$name] = $value;
1638 }
1639 }
1640 }
1641
1642 # Do not enter included links in link table
1643 if ( !is_null( $title ) ) {
1644 $wgLinkCache->suspend();
1645 }
1646
1647 # Run full parser on the included text
1648 $text = $this->stripParse( $text, $newline, $assocArgs );
1649
1650 # Resume the link cache and register the inclusion as a link
1651 if ( !is_null( $title ) ) {
1652 $wgLinkCache->resume();
1653 $wgLinkCache->addLinkObj( $title );
1654 }
1655 }
1656
1657 if ( !$found ) {
1658 return $matches[0];
1659 } else {
1660 return $text;
1661 }
1662 }
1663
1664 # Triple brace replacement -- used for template arguments
1665 function argSubstitution( $matches ) {
1666 $newline = $matches[1];
1667 $arg = trim( $matches[2] );
1668 $text = $matches[0];
1669 $inputArgs = end( $this->mArgStack );
1670
1671 if ( array_key_exists( $arg, $inputArgs ) ) {
1672 $text = $this->stripParse( $inputArgs[$arg], $newline, array() );
1673 }
1674
1675 return $text;
1676 }
1677
1678 # Returns true if the function is allowed to include this entity
1679 function incrementIncludeCount( $dbk ) {
1680 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
1681 $this->mIncludeCount[$dbk] = 0;
1682 }
1683 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
1684 return true;
1685 } else {
1686 return false;
1687 }
1688 }
1689
1690
1691 # Cleans up HTML, removes dangerous tags and attributes
1692 /* private */ function removeHTMLtags( $text ) {
1693 global $wgUseTidy, $wgUserHtml;
1694 $fname = 'Parser::removeHTMLtags';
1695 wfProfileIn( $fname );
1696
1697 if( $wgUserHtml ) {
1698 $htmlpairs = array( # Tags that must be closed
1699 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
1700 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
1701 'strike', 'strong', 'tt', 'var', 'div', 'center',
1702 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
1703 'ruby', 'rt' , 'rb' , 'rp', 'p'
1704 );
1705 $htmlsingle = array(
1706 'br', 'hr', 'li', 'dt', 'dd'
1707 );
1708 $htmlnest = array( # Tags that can be nested--??
1709 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
1710 'dl', 'font', 'big', 'small', 'sub', 'sup'
1711 );
1712 $tabletags = array( # Can only appear inside table
1713 'td', 'th', 'tr'
1714 );
1715 } else {
1716 $htmlpairs = array();
1717 $htmlsingle = array();
1718 $htmlnest = array();
1719 $tabletags = array();
1720 }
1721
1722 $htmlsingle = array_merge( $tabletags, $htmlsingle );
1723 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
1724
1725 $htmlattrs = $this->getHTMLattrs () ;
1726
1727 # Remove HTML comments
1728 $text = preg_replace( '/(\\n *<!--.*--> *(?=\\n)|<!--.*-->)/sU', '$2', $text );
1729
1730 $bits = explode( '<', $text );
1731 $text = array_shift( $bits );
1732 if(!$wgUseTidy) {
1733 $tagstack = array(); $tablestack = array();
1734 foreach ( $bits as $x ) {
1735 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
1736 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1737 $x, $regs );
1738 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1739 error_reporting( $prev );
1740
1741 $badtag = 0 ;
1742 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1743 # Check our stack
1744 if ( $slash ) {
1745 # Closing a tag...
1746 if ( ! in_array( $t, $htmlsingle ) &&
1747 ( $ot = @array_pop( $tagstack ) ) != $t ) {
1748 @array_push( $tagstack, $ot );
1749 $badtag = 1;
1750 } else {
1751 if ( $t == 'table' ) {
1752 $tagstack = array_pop( $tablestack );
1753 }
1754 $newparams = '';
1755 }
1756 } else {
1757 # Keep track for later
1758 if ( in_array( $t, $tabletags ) &&
1759 ! in_array( 'table', $tagstack ) ) {
1760 $badtag = 1;
1761 } else if ( in_array( $t, $tagstack ) &&
1762 ! in_array ( $t , $htmlnest ) ) {
1763 $badtag = 1 ;
1764 } else if ( ! in_array( $t, $htmlsingle ) ) {
1765 if ( $t == 'table' ) {
1766 array_push( $tablestack, $tagstack );
1767 $tagstack = array();
1768 }
1769 array_push( $tagstack, $t );
1770 }
1771 # Strip non-approved attributes from the tag
1772 $newparams = $this->fixTagAttributes($params);
1773
1774 }
1775 if ( ! $badtag ) {
1776 $rest = str_replace( '>', '&gt;', $rest );
1777 $text .= "<$slash$t $newparams$brace$rest";
1778 continue;
1779 }
1780 }
1781 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1782 }
1783 # Close off any remaining tags
1784 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
1785 $text .= "</$t>\n";
1786 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
1787 }
1788 } else {
1789 # this might be possible using tidy itself
1790 foreach ( $bits as $x ) {
1791 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
1792 $x, $regs );
1793 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
1794 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
1795 $newparams = $this->fixTagAttributes($params);
1796 $rest = str_replace( '>', '&gt;', $rest );
1797 $text .= "<$slash$t $newparams$brace$rest";
1798 } else {
1799 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
1800 }
1801 }
1802 }
1803 wfProfileOut( $fname );
1804 return $text;
1805 }
1806
1807
1808 /*
1809 *
1810 * This function accomplishes several tasks:
1811 * 1) Auto-number headings if that option is enabled
1812 * 2) Add an [edit] link to sections for logged in users who have enabled the option
1813 * 3) Add a Table of contents on the top for users who have enabled the option
1814 * 4) Auto-anchor headings
1815 *
1816 * It loops through all headlines, collects the necessary data, then splits up the
1817 * string and re-inserts the newly formatted headlines.
1818 *
1819 */
1820
1821 /* private */ function formatHeadings( $text, $isMain=true ) {
1822 global $wgInputEncoding;
1823
1824 $doNumberHeadings = $this->mOptions->getNumberHeadings();
1825 $doShowToc = $this->mOptions->getShowToc();
1826 if( !$this->mTitle->userCanEdit() ) {
1827 $showEditLink = 0;
1828 $rightClickHack = 0;
1829 } else {
1830 $showEditLink = $this->mOptions->getEditSection();
1831 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
1832 }
1833
1834 # Inhibit editsection links if requested in the page
1835 $esw =& MagicWord::get( MAG_NOEDITSECTION );
1836 if( $esw->matchAndRemove( $text ) ) {
1837 $showEditLink = 0;
1838 }
1839 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
1840 # do not add TOC
1841 $mw =& MagicWord::get( MAG_NOTOC );
1842 if( $mw->matchAndRemove( $text ) ) {
1843 $doShowToc = 0;
1844 }
1845
1846 # never add the TOC to the Main Page. This is an entry page that should not
1847 # be more than 1-2 screens large anyway
1848 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
1849 $doShowToc = 0;
1850 }
1851
1852 # Get all headlines for numbering them and adding funky stuff like [edit]
1853 # links - this is for later, but we need the number of headlines right now
1854 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
1855
1856 # if there are fewer than 4 headlines in the article, do not show TOC
1857 if( $numMatches < 4 ) {
1858 $doShowToc = 0;
1859 }
1860
1861 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
1862 # override above conditions and always show TOC
1863 $mw =& MagicWord::get( MAG_FORCETOC );
1864 if ($mw->matchAndRemove( $text ) ) {
1865 $doShowToc = 1;
1866 }
1867
1868
1869 # We need this to perform operations on the HTML
1870 $sk =& $this->mOptions->getSkin();
1871
1872 # headline counter
1873 $headlineCount = 0;
1874
1875 # Ugh .. the TOC should have neat indentation levels which can be
1876 # passed to the skin functions. These are determined here
1877 $toclevel = 0;
1878 $toc = '';
1879 $full = '';
1880 $head = array();
1881 $sublevelCount = array();
1882 $level = 0;
1883 $prevlevel = 0;
1884 foreach( $matches[3] as $headline ) {
1885 $numbering = '';
1886 if( $level ) {
1887 $prevlevel = $level;
1888 }
1889 $level = $matches[1][$headlineCount];
1890 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
1891 # reset when we enter a new level
1892 $sublevelCount[$level] = 0;
1893 $toc .= $sk->tocIndent( $level - $prevlevel );
1894 $toclevel += $level - $prevlevel;
1895 }
1896 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
1897 # reset when we step back a level
1898 $sublevelCount[$level+1]=0;
1899 $toc .= $sk->tocUnindent( $prevlevel - $level );
1900 $toclevel -= $prevlevel - $level;
1901 }
1902 # count number of headlines for each level
1903 @$sublevelCount[$level]++;
1904 if( $doNumberHeadings || $doShowToc ) {
1905 $dot = 0;
1906 for( $i = 1; $i <= $level; $i++ ) {
1907 if( !empty( $sublevelCount[$i] ) ) {
1908 if( $dot ) {
1909 $numbering .= '.';
1910 }
1911 $numbering .= $sublevelCount[$i];
1912 $dot = 1;
1913 }
1914 }
1915 }
1916
1917 # The canonized header is a version of the header text safe to use for links
1918 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
1919 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
1920 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
1921
1922 # strip out HTML
1923 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
1924 $tocline = trim( $canonized_headline );
1925 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
1926 $replacearray = array(
1927 '%3A' => ':',
1928 '%' => '.'
1929 );
1930 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
1931 $refer[$headlineCount] = $canonized_headline;
1932
1933 # count how many in assoc. array so we can track dupes in anchors
1934 @$refers[$canonized_headline]++;
1935 $refcount[$headlineCount]=$refers[$canonized_headline];
1936
1937 # Prepend the number to the heading text
1938
1939 if( $doNumberHeadings || $doShowToc ) {
1940 $tocline = $numbering . ' ' . $tocline;
1941
1942 # Don't number the heading if it is the only one (looks silly)
1943 if( $doNumberHeadings && count( $matches[3] ) > 1) {
1944 # the two are different if the line contains a link
1945 $headline=$numbering . ' ' . $headline;
1946 }
1947 }
1948
1949 # Create the anchor for linking from the TOC to the section
1950 $anchor = $canonized_headline;
1951 if($refcount[$headlineCount] > 1 ) {
1952 $anchor .= '_' . $refcount[$headlineCount];
1953 }
1954 if( $doShowToc ) {
1955 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
1956 }
1957 if( $showEditLink ) {
1958 if ( empty( $head[$headlineCount] ) ) {
1959 $head[$headlineCount] = '';
1960 }
1961 $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1);
1962 }
1963
1964 # Add the edit section span
1965 if( $rightClickHack ) {
1966 $headline = $sk->editSectionScript($headlineCount+1,$headline);
1967 }
1968
1969 # give headline the correct <h#> tag
1970 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline."</h".$level.">";
1971
1972 $headlineCount++;
1973 }
1974
1975 if( $doShowToc ) {
1976 $toclines = $headlineCount;
1977 $toc .= $sk->tocUnindent( $toclevel );
1978 $toc = $sk->tocTable( $toc );
1979 }
1980
1981 # split up and insert constructed headlines
1982
1983 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
1984 $i = 0;
1985
1986 foreach( $blocks as $block ) {
1987 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
1988 # This is the [edit] link that appears for the top block of text when
1989 # section editing is enabled
1990
1991 # Disabled because it broke block formatting
1992 # For example, a bullet point in the top line
1993 # $full .= $sk->editSectionLink(0);
1994 }
1995 $full .= $block;
1996 if( $doShowToc && !$i && $isMain) {
1997 # Top anchor now in skin
1998 $full = $full.$toc;
1999 }
2000
2001 if( !empty( $head[$i] ) ) {
2002 $full .= $head[$i];
2003 }
2004 $i++;
2005 }
2006
2007 return $full;
2008 }
2009
2010 # Return an HTML link for the "ISBN 123456" text
2011 /* private */ function magicISBN( $text ) {
2012 global $wgLang;
2013
2014 $a = split( 'ISBN ', " $text" );
2015 if ( count ( $a ) < 2 ) return $text;
2016 $text = substr( array_shift( $a ), 1);
2017 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2018
2019 foreach ( $a as $x ) {
2020 $isbn = $blank = '' ;
2021 while ( ' ' == $x{0} ) {
2022 $blank .= ' ';
2023 $x = substr( $x, 1 );
2024 }
2025 while ( strstr( $valid, $x{0} ) != false ) {
2026 $isbn .= $x{0};
2027 $x = substr( $x, 1 );
2028 }
2029 $num = str_replace( '-', '', $isbn );
2030 $num = str_replace( ' ', '', $num );
2031
2032 if ( '' == $num ) {
2033 $text .= "ISBN $blank$x";
2034 } else {
2035 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2036 $text .= '<a href="' .
2037 $titleObj->escapeLocalUrl( "isbn={$num}" ) .
2038 "\" class=\"internal\">ISBN $isbn</a>";
2039 $text .= $x;
2040 }
2041 }
2042 return $text;
2043 }
2044
2045 # Return an HTML link for the "RFC 1234" text
2046 /* private */ function magicRFC( $text ) {
2047 global $wgLang;
2048
2049 $a = split( 'RFC ', ' '.$text );
2050 if ( count ( $a ) < 2 ) return $text;
2051 $text = substr( array_shift( $a ), 1);
2052 $valid = '0123456789';
2053
2054 foreach ( $a as $x ) {
2055 $rfc = $blank = '' ;
2056 while ( ' ' == $x{0} ) {
2057 $blank .= ' ';
2058 $x = substr( $x, 1 );
2059 }
2060 while ( strstr( $valid, $x{0} ) != false ) {
2061 $rfc .= $x{0};
2062 $x = substr( $x, 1 );
2063 }
2064
2065 if ( '' == $rfc ) {
2066 $text .= "RFC $blank$x";
2067 } else {
2068 $url = wfmsg( 'rfcurl' );
2069 $url = str_replace( '$1', $rfc, $url);
2070 $sk =& $this->mOptions->getSkin();
2071 $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" );
2072 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2073 }
2074 }
2075 return $text;
2076 }
2077
2078 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2079 $this->mOptions = $options;
2080 $this->mTitle =& $title;
2081 $this->mOutputType = OT_WIKI;
2082
2083 if ( $clearState ) {
2084 $this->clearState();
2085 }
2086
2087 $stripState = false;
2088 $pairs = array(
2089 "\r\n" => "\n",
2090 );
2091 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2092 // now with regexes
2093 /*
2094 $pairs = array(
2095 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2096 "/<br *?>/i" => "<br />",
2097 );
2098 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2099 */
2100 $text = $this->strip( $text, $stripState, false );
2101 $text = $this->pstPass2( $text, $user );
2102 $text = $this->unstrip( $text, $stripState );
2103 $text = $this->unstripNoWiki( $text, $stripState );
2104 return $text;
2105 }
2106
2107 /* private */ function pstPass2( $text, &$user ) {
2108 global $wgLang, $wgLocaltimezone, $wgCurParser;
2109
2110 # Variable replacement
2111 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2112 $text = $this->replaceVariables( $text );
2113
2114 # Signatures
2115 #
2116 $n = $user->getName();
2117 $k = $user->getOption( 'nickname' );
2118 if ( '' == $k ) { $k = $n; }
2119 if(isset($wgLocaltimezone)) {
2120 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2121 }
2122 /* Note: this is an ugly timezone hack for the European wikis */
2123 $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) .
2124 ' (' . date( 'T' ) . ')';
2125 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2126
2127 $text = preg_replace( '/~~~~~/', $d, $text );
2128 $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText(
2129 Namespace::getUser() ) . ":$n|$k]] $d", $text );
2130 $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText(
2131 Namespace::getUser() ) . ":$n|$k]]", $text );
2132
2133 # Context links: [[|name]] and [[name (context)|]]
2134 #
2135 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2136 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2137 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2138 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2139
2140 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2141 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2142 $p3 = "/\[\[($namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]]
2143 $p4 = "/\[\[($namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/";
2144 # [[ns:page (cont)|]]
2145 $context = "";
2146 $t = $this->mTitle->getText();
2147 if ( preg_match( $conpat, $t, $m ) ) {
2148 $context = $m[2];
2149 }
2150 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2151 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2152 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2153
2154 if ( '' == $context ) {
2155 $text = preg_replace( $p2, '[[\\1]]', $text );
2156 } else {
2157 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2158 }
2159
2160 /*
2161 $mw =& MagicWord::get( MAG_SUBST );
2162 $wgCurParser = $this->fork();
2163 $text = $mw->substituteCallback( $text, "wfBraceSubstitution" );
2164 $this->merge( $wgCurParser );
2165 */
2166
2167 # Trim trailing whitespace
2168 # MAG_END (__END__) tag allows for trailing
2169 # whitespace to be deliberately included
2170 $text = rtrim( $text );
2171 $mw =& MagicWord::get( MAG_END );
2172 $mw->matchAndRemove( $text );
2173
2174 return $text;
2175 }
2176
2177 # Set up some variables which are usually set up in parse()
2178 # so that an external function can call some class members with confidence
2179 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2180 $this->mTitle =& $title;
2181 $this->mOptions = $options;
2182 $this->mOutputType = $outputType;
2183 if ( $clearState ) {
2184 $this->clearState();
2185 }
2186 }
2187
2188 function transformMsg( $text, $options ) {
2189 global $wgTitle;
2190 static $executing = false;
2191
2192 # Guard against infinite recursion
2193 if ( $executing ) {
2194 return $text;
2195 }
2196 $executing = true;
2197
2198 $this->mTitle = $wgTitle;
2199 $this->mOptions = $options;
2200 $this->mOutputType = OT_MSG;
2201 $this->clearState();
2202 $text = $this->replaceVariables( $text );
2203
2204 $executing = false;
2205 return $text;
2206 }
2207
2208 # Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2209 # Callback will be called with the text within
2210 # Transform and return the text within
2211 function setHook( $tag, $callback ) {
2212 $oldVal = @$this->mTagHooks[$tag];
2213 $this->mTagHooks[$tag] = $callback;
2214 return $oldVal;
2215 }
2216 }
2217
2218 class ParserOutput
2219 {
2220 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2221 var $mCacheTime; # Used in ParserCache
2222
2223 function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(),
2224 $containsOldMagic = false )
2225 {
2226 $this->mText = $text;
2227 $this->mLanguageLinks = $languageLinks;
2228 $this->mCategoryLinks = $categoryLinks;
2229 $this->mContainsOldMagic = $containsOldMagic;
2230 $this->mCacheTime = "";
2231 }
2232
2233 function getText() { return $this->mText; }
2234 function getLanguageLinks() { return $this->mLanguageLinks; }
2235 function getCategoryLinks() { return $this->mCategoryLinks; }
2236 function getCacheTime() { return $this->mCacheTime; }
2237 function containsOldMagic() { return $this->mContainsOldMagic; }
2238 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2239 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2240 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2241 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2242 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2243
2244 function merge( $other ) {
2245 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2246 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2247 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2248 }
2249
2250 }
2251
2252 class ParserOptions
2253 {
2254 # All variables are private
2255 var $mUseTeX; # Use texvc to expand <math> tags
2256 var $mUseCategoryMagic; # Treat [[Category:xxxx]] tags specially
2257 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2258 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2259 var $mAllowExternalImages; # Allow external images inline
2260 var $mSkin; # Reference to the preferred skin
2261 var $mDateFormat; # Date format index
2262 var $mEditSection; # Create "edit section" links
2263 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2264 var $mNumberHeadings; # Automatically number headings
2265 var $mShowToc; # Show table of contents
2266
2267 function getUseTeX() { return $this->mUseTeX; }
2268 function getUseCategoryMagic() { return $this->mUseCategoryMagic; }
2269 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2270 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2271 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2272 function getSkin() { return $this->mSkin; }
2273 function getDateFormat() { return $this->mDateFormat; }
2274 function getEditSection() { return $this->mEditSection; }
2275 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2276 function getNumberHeadings() { return $this->mNumberHeadings; }
2277 function getShowToc() { return $this->mShowToc; }
2278
2279 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2280 function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); }
2281 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2282 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2283 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2284 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2285 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2286 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2287 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2288 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2289
2290 function setSkin( &$x ) { $this->mSkin =& $x; }
2291
2292 /* static */ function newFromUser( &$user ) {
2293 $popts = new ParserOptions;
2294 $popts->initialiseFromUser( $user );
2295 return $popts;
2296 }
2297
2298 function initialiseFromUser( &$userInput ) {
2299 global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2300
2301 if ( !$userInput ) {
2302 $user = new User;
2303 $user->setLoaded( true );
2304 } else {
2305 $user =& $userInput;
2306 }
2307
2308 $this->mUseTeX = $wgUseTeX;
2309 $this->mUseCategoryMagic = $wgUseCategoryMagic;
2310 $this->mUseDynamicDates = $wgUseDynamicDates;
2311 $this->mInterwikiMagic = $wgInterwikiMagic;
2312 $this->mAllowExternalImages = $wgAllowExternalImages;
2313 $this->mSkin =& $user->getSkin();
2314 $this->mDateFormat = $user->getOption( 'date' );
2315 $this->mEditSection = $user->getOption( 'editsection' );
2316 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2317 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2318 $this->mShowToc = $user->getOption( 'showtoc' );
2319 }
2320
2321
2322 }
2323
2324 # Regex callbacks, used in Parser::replaceVariables
2325 function wfBraceSubstitution( $matches )
2326 {
2327 global $wgCurParser;
2328 return $wgCurParser->braceSubstitution( $matches );
2329 }
2330
2331 function wfArgSubstitution( $matches )
2332 {
2333 global $wgCurParser;
2334 return $wgCurParser->argSubstitution( $matches );
2335 }
2336
2337 function wfVariableSubstitution( $matches )
2338 {
2339 global $wgCurParser;
2340 return $wgCurParser->variableSubstitution( $matches );
2341 }
2342
2343 ?>