2395ace96ec7eead793caa1c89a720b02bc1b3be
[lhc/web/wiklou.git] / includes / Parser.php
1 <?php
2
3 /**
4 * File for Parser and related classes
5 *
6 * @package MediaWiki
7 * @version $Id$
8 */
9
10 /**
11 * Variable substitution O(N^2) attack
12 *
13 * Without countermeasures, it would be possible to attack the parser by saving
14 * a page filled with a large number of inclusions of large pages. The size of
15 * the generated page would be proportional to the square of the input size.
16 * Hence, we limit the number of inclusions of any given page, thus bringing any
17 * attack back to O(N).
18 */
19 define( 'MAX_INCLUDE_REPEAT', 100 );
20 define( 'MAX_INCLUDE_SIZE', 1000000 ); // 1 Million
21
22 # Allowed values for $mOutputType
23 define( 'OT_HTML', 1 );
24 define( 'OT_WIKI', 2 );
25 define( 'OT_MSG' , 3 );
26
27 # string parameter for extractTags which will cause it
28 # to strip HTML comments in addition to regular
29 # <XML>-style tags. This should not be anything we
30 # may want to use in wikisyntax
31 define( 'STRIP_COMMENTS', 'HTMLCommentStrip' );
32
33 # prefix for escaping, used in two functions at least
34 define( 'UNIQ_PREFIX', 'NaodW29');
35
36 # Constants needed for external link processing
37 define( 'URL_PROTOCOLS', 'http|https|ftp|irc|gopher|news|mailto' );
38 define( 'HTTP_PROTOCOLS', 'http|https' );
39 # Everything except bracket, space, or control characters
40 define( 'EXT_LINK_URL_CLASS', '[^]<>\\x00-\\x20\\x7F]' );
41 # Including space
42 define( 'EXT_LINK_TEXT_CLASS', '[^\]\\x00-\\x1F\\x7F]' );
43 define( 'EXT_IMAGE_FNAME_CLASS', '[A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]' );
44 define( 'EXT_IMAGE_EXTENSIONS', 'gif|png|jpg|jpeg' );
45 define( 'EXT_LINK_BRACKETED', '/\[(('.URL_PROTOCOLS.'):'.EXT_LINK_URL_CLASS.'+) *('.EXT_LINK_TEXT_CLASS.'*?)\]/S' );
46 define( 'EXT_IMAGE_REGEX',
47 '/^('.HTTP_PROTOCOLS.':)'. # Protocol
48 '('.EXT_LINK_URL_CLASS.'+)\\/'. # Hostname and path
49 '('.EXT_IMAGE_FNAME_CLASS.'+)\\.((?i)'.EXT_IMAGE_EXTENSIONS.')$/S' # Filename
50 );
51
52 /**
53 * PHP Parser
54 *
55 * Processes wiki markup
56 *
57 * <pre>
58 * There are three main entry points into the Parser class:
59 * parse()
60 * produces HTML output
61 * preSaveTransform().
62 * produces altered wiki markup.
63 * transformMsg()
64 * performs brace substitution on MediaWiki messages
65 *
66 * Globals used:
67 * objects: $wgLang, $wgDateFormatter, $wgLinkCache, $wgCurParser
68 *
69 * NOT $wgArticle, $wgUser or $wgTitle. Keep them away!
70 *
71 * settings:
72 * $wgUseTex*, $wgUseDynamicDates*, $wgInterwikiMagic*,
73 * $wgNamespacesWithSubpages, $wgAllowExternalImages*,
74 * $wgLocaltimezone
75 *
76 * * only within ParserOptions
77 * </pre>
78 *
79 * @package MediaWiki
80 */
81 class Parser
82 {
83 /**#@+
84 * @access private
85 */
86 # Persistent:
87 var $mTagHooks;
88
89 # Cleared with clearState():
90 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
91 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
92
93 # Temporary:
94 var $mOptions, $mTitle, $mOutputType,
95 $mTemplates, // cache of already loaded templates, avoids
96 // multiple SQL queries for the same string
97 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
98 // in this path. Used for loop detection.
99
100 /**#@-*/
101
102 /**
103 * Constructor
104 *
105 * @access public
106 */
107 function Parser() {
108 $this->mTemplates = array();
109 $this->mTemplatePath = array();
110 $this->mTagHooks = array();
111 $this->clearState();
112 }
113
114 /**
115 * Clear Parser state
116 *
117 * @access private
118 */
119 function clearState() {
120 $this->mOutput = new ParserOutput;
121 $this->mAutonumber = 0;
122 $this->mLastSection = "";
123 $this->mDTopen = false;
124 $this->mVariables = false;
125 $this->mIncludeCount = array();
126 $this->mStripState = array();
127 $this->mArgStack = array();
128 $this->mInPre = false;
129 }
130
131 /**
132 * First pass--just handle <nowiki> sections, pass the rest off
133 * to internalParse() which does all the real work.
134 *
135 * @access private
136 * @return ParserOutput a ParserOutput
137 */
138 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
139 global $wgUseTidy;
140 $fname = 'Parser::parse';
141 wfProfileIn( $fname );
142
143 if ( $clearState ) {
144 $this->clearState();
145 }
146
147 $this->mOptions = $options;
148 $this->mTitle =& $title;
149 $this->mOutputType = OT_HTML;
150
151 $stripState = NULL;
152 $text = $this->strip( $text, $this->mStripState );
153 $text = $this->internalParse( $text, $linestart );
154 $text = $this->unstrip( $text, $this->mStripState );
155 # Clean up special characters, only run once, next-to-last before doBlockLevels
156 if(!$wgUseTidy) {
157 $fixtags = array(
158 # french spaces, last one Guillemet-left
159 # only if there is something before the space
160 '/(.) (?=\\?|:|;|!|\\302\\273)/i' => '\\1&nbsp;\\2',
161 # french spaces, Guillemet-right
162 "/(\\302\\253) /i"=>"\\1&nbsp;",
163 '/<hr *>/i' => '<hr />',
164 '/<br *>/i' => '<br />',
165 '/<center *>/i' => '<div class="center">',
166 '/<\\/center *>/i' => '</div>',
167 # Clean up spare ampersands; note that we probably ought to be
168 # more careful about named entities.
169 '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&amp;'
170 );
171 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
172 } else {
173 $fixtags = array(
174 # french spaces, last one Guillemet-left
175 '/ (\\?|:|;|!|\\302\\273)/i' => '&nbsp;\\1',
176 # french spaces, Guillemet-right
177 '/(\\302\\253) /i' => '\\1&nbsp;',
178 '/<center *>/i' => '<div class="center">',
179 '/<\\/center *>/i' => '</div>'
180 );
181 $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text );
182 }
183 # only once and last
184 $text = $this->doBlockLevels( $text, $linestart );
185 $text = $this->unstripNoWiki( $text, $this->mStripState );
186 $this->mOutput->setText( $text );
187 wfProfileOut( $fname );
188 return $this->mOutput;
189 }
190
191 /**
192 * Get a random string
193 *
194 * @access private
195 * @static
196 */
197 function getRandomString() {
198 return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
199 }
200
201 /**
202 * Replaces all occurrences of <$tag>content</$tag> in the text
203 * with a random marker and returns the new text. the output parameter
204 * $content will be an associative array filled with data on the form
205 * $unique_marker => content.
206 *
207 * If $content is already set, the additional entries will be appended
208 * If $tag is set to STRIP_COMMENTS, the function will extract
209 * <!-- HTML comments -->
210 *
211 * @access private
212 * @static
213 */
214 function extractTags($tag, $text, &$content, $uniq_prefix = ''){
215 $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString();
216 if ( !$content ) {
217 $content = array( );
218 }
219 $n = 1;
220 $stripped = '';
221
222 while ( '' != $text ) {
223 if($tag==STRIP_COMMENTS) {
224 $p = preg_split( '/<!--/i', $text, 2 );
225 } else {
226 $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 );
227 }
228 $stripped .= $p[0];
229 if ( ( count( $p ) < 2 ) || ( '' == $p[1] ) ) {
230 $text = '';
231 } else {
232 if($tag==STRIP_COMMENTS) {
233 $q = preg_split( '/-->/i', $p[1], 2 );
234 } else {
235 $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 );
236 }
237 $marker = $rnd . sprintf('%08X', $n++);
238 $content[$marker] = $q[0];
239 $stripped .= $marker;
240 $text = $q[1];
241 }
242 }
243 return $stripped;
244 }
245
246 /**
247 * Strips and renders nowiki, pre, math, hiero
248 * If $render is set, performs necessary rendering operations on plugins
249 * Returns the text, and fills an array with data needed in unstrip()
250 * If the $state is already a valid strip state, it adds to the state
251 *
252 * @param bool $stripcomments when set, HTML comments <!-- like this -->
253 * will be stripped in addition to other tags. This is important
254 * for section editing, where these comments cause confusion when
255 * counting the sections in the wikisource
256 *
257 * @access private
258 */
259 function strip( $text, &$state, $stripcomments = false ) {
260 $render = ($this->mOutputType == OT_HTML);
261 $html_content = array();
262 $nowiki_content = array();
263 $math_content = array();
264 $pre_content = array();
265 $comment_content = array();
266 $ext_content = array();
267
268 # Replace any instances of the placeholders
269 $uniq_prefix = UNIQ_PREFIX;
270 #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
271
272 # html
273 global $wgRawHtml, $wgWhitelistEdit;
274 if( $wgRawHtml && $wgWhitelistEdit ) {
275 $text = Parser::extractTags('html', $text, $html_content, $uniq_prefix);
276 foreach( $html_content as $marker => $content ) {
277 if ($render ) {
278 # Raw and unchecked for validity.
279 $html_content[$marker] = $content;
280 } else {
281 $html_content[$marker] = '<html>'.$content.'</html>';
282 }
283 }
284 }
285
286 # nowiki
287 $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix);
288 foreach( $nowiki_content as $marker => $content ) {
289 if( $render ){
290 $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content );
291 } else {
292 $nowiki_content[$marker] = '<nowiki>'.$content.'</nowiki>';
293 }
294 }
295
296 # math
297 $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix);
298 foreach( $math_content as $marker => $content ){
299 if( $render ) {
300 if( $this->mOptions->getUseTeX() ) {
301 $math_content[$marker] = renderMath( $content );
302 } else {
303 $math_content[$marker] = '&lt;math&gt;'.$content.'&lt;math&gt;';
304 }
305 } else {
306 $math_content[$marker] = '<math>'.$content.'</math>';
307 }
308 }
309
310 # pre
311 $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix);
312 foreach( $pre_content as $marker => $content ){
313 if( $render ){
314 $pre_content[$marker] = '<pre>' . wfEscapeHTMLTagsOnly( $content ) . '</pre>';
315 } else {
316 $pre_content[$marker] = '<pre>'.$content.'</pre>';
317 }
318 }
319
320 # Comments
321 if($stripcomments) {
322 $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix);
323 foreach( $comment_content as $marker => $content ){
324 $comment_content[$marker] = '<!--'.$content.'-->';
325 }
326 }
327
328 # Extensions
329 foreach ( $this->mTagHooks as $tag => $callback ) {
330 $ext_contents[$tag] = array();
331 $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix );
332 foreach( $ext_content[$tag] as $marker => $content ) {
333 if ( $render ) {
334 $ext_content[$tag][$marker] = $callback( $content );
335 } else {
336 $ext_content[$tag][$marker] = "<$tag>$content</$tag>";
337 }
338 }
339 }
340
341 # Merge state with the pre-existing state, if there is one
342 if ( $state ) {
343 $state['html'] = $state['html'] + $html_content;
344 $state['nowiki'] = $state['nowiki'] + $nowiki_content;
345 $state['math'] = $state['math'] + $math_content;
346 $state['pre'] = $state['pre'] + $pre_content;
347 $state['comment'] = $state['comment'] + $comment_content;
348
349 foreach( $ext_content as $tag => $array ) {
350 if ( array_key_exists( $tag, $state ) ) {
351 $state[$tag] = $state[$tag] + $array;
352 }
353 }
354 } else {
355 $state = array(
356 'html' => $html_content,
357 'nowiki' => $nowiki_content,
358 'math' => $math_content,
359 'pre' => $pre_content,
360 'comment' => $comment_content,
361 ) + $ext_content;
362 }
363 return $text;
364 }
365
366 /**
367 * restores pre, math, and heiro removed by strip()
368 *
369 * always call unstripNoWiki() after this one
370 * @access private
371 */
372 function unstrip( $text, &$state ) {
373 # Must expand in reverse order, otherwise nested tags will be corrupted
374 $contentDict = end( $state );
375 for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) {
376 if( key($state) != 'nowiki' && key($state) != 'html') {
377 for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) {
378 $text = str_replace( key( $contentDict ), $content, $text );
379 }
380 }
381 }
382
383 return $text;
384 }
385
386 /**
387 * always call this after unstrip() to preserve the order
388 *
389 * @access private
390 */
391 function unstripNoWiki( $text, &$state ) {
392 # Must expand in reverse order, otherwise nested tags will be corrupted
393 for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) {
394 $text = str_replace( key( $state['nowiki'] ), $content, $text );
395 }
396
397 global $wgRawHtml;
398 if ($wgRawHtml) {
399 for ( $content = end($state['html']); $content !== false; $content = prev( $state['html'] ) ) {
400 $text = str_replace( key( $state['html'] ), $content, $text );
401 }
402 }
403
404 return $text;
405 }
406
407 /**
408 * Add an item to the strip state
409 * Returns the unique tag which must be inserted into the stripped text
410 * The tag will be replaced with the original text in unstrip()
411 *
412 * @access private
413 */
414 function insertStripItem( $text, &$state ) {
415 $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString();
416 if ( !$state ) {
417 $state = array(
418 'html' => array(),
419 'nowiki' => array(),
420 'math' => array(),
421 'pre' => array()
422 );
423 }
424 $state['item'][$rnd] = $text;
425 return $rnd;
426 }
427
428 /**
429 * Return allowed HTML attributes
430 *
431 * @access private
432 */
433 function getHTMLattrs () {
434 $htmlattrs = array( # Allowed attributes--no scripting, etc.
435 'title', 'align', 'lang', 'dir', 'width', 'height',
436 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */
437 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color',
438 /* FONT */ 'type', 'start', 'value', 'compact',
439 /* For various lists, mostly deprecated but safe */
440 'summary', 'width', 'border', 'frame', 'rules',
441 'cellspacing', 'cellpadding', 'valign', 'char',
442 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis',
443 'headers', 'scope', 'rowspan', 'colspan', /* Tables */
444 'id', 'class', 'name', 'style' /* For CSS */
445 );
446 return $htmlattrs ;
447 }
448
449 /**
450 * Remove non approved attributes and javascript in css
451 *
452 * @access private
453 */
454 function fixTagAttributes ( $t ) {
455 if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-)
456 $htmlattrs = $this->getHTMLattrs() ;
457
458 # Strip non-approved attributes from the tag
459 $t = preg_replace(
460 '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e',
461 "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')",
462 $t);
463
464 $t = str_replace ( '<></>' , '' , $t ) ; # This should fix bug 980557
465
466 # Strip javascript "expression" from stylesheets. Brute force approach:
467 # If anythin offensive is found, all attributes of the HTML tag are dropped
468
469 if( preg_match(
470 '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is',
471 wfMungeToUtf8( $t ) ) )
472 {
473 $t='';
474 }
475
476 return trim ( $t ) ;
477 }
478
479 /**
480 * interface with html tidy, used if $wgUseTidy = true
481 *
482 * @access public
483 * @static
484 */
485 function tidy ( $text ) {
486 global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
487 global $wgInputEncoding, $wgOutputEncoding;
488 $fname = 'Parser::tidy';
489 wfProfileIn( $fname );
490
491 $cleansource = '';
492 $opts = '';
493 switch(strtoupper($wgOutputEncoding)) {
494 case 'ISO-8859-1':
495 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
496 break;
497 case 'UTF-8':
498 $opts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
499 break;
500 default:
501 $opts .= ' -raw';
502 }
503
504 $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
505 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
506 '<head><title>test</title></head><body>'.$text.'</body></html>';
507 $descriptorspec = array(
508 0 => array('pipe', 'r'),
509 1 => array('pipe', 'w'),
510 2 => array('file', '/dev/null', 'a')
511 );
512 $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes);
513 if (is_resource($process)) {
514 fwrite($pipes[0], $wrappedtext);
515 fclose($pipes[0]);
516 while (!feof($pipes[1])) {
517 $cleansource .= fgets($pipes[1], 1024);
518 }
519 fclose($pipes[1]);
520 $return_value = proc_close($process);
521 }
522
523 wfProfileOut( $fname );
524
525 if( $cleansource == '' && $text != '') {
526 wfDebug( "Tidy error detected!\n" );
527 return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
528 } else {
529 return $cleansource;
530 }
531 }
532
533 /**
534 * parse the wiki syntax used to render tables
535 *
536 * @access private
537 */
538 function doTableStuff ( $t ) {
539 $fname = 'Parser::doTableStuff';
540 wfProfileIn( $fname );
541
542 $t = explode ( "\n" , $t ) ;
543 $td = array () ; # Is currently a td tag open?
544 $ltd = array () ; # Was it TD or TH?
545 $tr = array () ; # Is currently a tr tag open?
546 $ltr = array () ; # tr attributes
547 $indent_level = 0; # indent level of the table
548 foreach ( $t AS $k => $x )
549 {
550 $x = trim ( $x ) ;
551 $fc = substr ( $x , 0 , 1 ) ;
552 if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
553 $indent_level = strlen( $matches[1] );
554 $t[$k] = "\n" .
555 str_repeat( '<dl><dd>', $indent_level ) .
556 '<table ' . $this->fixTagAttributes ( $matches[2] ) . '>' ;
557 array_push ( $td , false ) ;
558 array_push ( $ltd , '' ) ;
559 array_push ( $tr , false ) ;
560 array_push ( $ltr , '' ) ;
561 }
562 else if ( count ( $td ) == 0 ) { } # Don't do any of the following
563 else if ( '|}' == substr ( $x , 0 , 2 ) ) {
564 $z = "</table>\n" ;
565 $l = array_pop ( $ltd ) ;
566 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
567 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
568 array_pop ( $ltr ) ;
569 $t[$k] = $z . str_repeat( '</dd></dl>', $indent_level );
570 }
571 else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
572 $x = substr ( $x , 1 ) ;
573 while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
574 $z = '' ;
575 $l = array_pop ( $ltd ) ;
576 if ( array_pop ( $tr ) ) $z = '</tr>' . $z ;
577 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
578 array_pop ( $ltr ) ;
579 $t[$k] = $z ;
580 array_push ( $tr , false ) ;
581 array_push ( $td , false ) ;
582 array_push ( $ltd , '' ) ;
583 array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ;
584 }
585 else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
586 # $x is a table row
587 if ( '|+' == substr ( $x , 0 , 2 ) ) {
588 $fc = '+' ;
589 $x = substr ( $x , 1 ) ;
590 }
591 $after = substr ( $x , 1 ) ;
592 if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
593 $after = explode ( '||' , $after ) ;
594 $t[$k] = '' ;
595
596 # Loop through each table cell
597 foreach ( $after AS $theline )
598 {
599 $z = '' ;
600 if ( $fc != '+' )
601 {
602 $tra = array_pop ( $ltr ) ;
603 if ( !array_pop ( $tr ) ) $z = '<tr '.$tra.">\n" ;
604 array_push ( $tr , true ) ;
605 array_push ( $ltr , '' ) ;
606 }
607
608 $l = array_pop ( $ltd ) ;
609 if ( array_pop ( $td ) ) $z = '</'.$l.'>' . $z ;
610 if ( $fc == '|' ) $l = 'td' ;
611 else if ( $fc == '!' ) $l = 'th' ;
612 else if ( $fc == '+' ) $l = 'caption' ;
613 else $l = '' ;
614 array_push ( $ltd , $l ) ;
615
616 # Cell parameters
617 $y = explode ( '|' , $theline , 2 ) ;
618 # Note that a '|' inside an invalid link should not
619 # be mistaken as delimiting cell parameters
620 if ( strpos( $y[0], '[[' ) !== false ) {
621 $y = array ($theline);
622 }
623 if ( count ( $y ) == 1 )
624 $y = "{$z}<{$l}>{$y[0]}" ;
625 else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ;
626 $t[$k] .= $y ;
627 array_push ( $td , true ) ;
628 }
629 }
630 }
631
632 # Closing open td, tr && table
633 while ( count ( $td ) > 0 )
634 {
635 if ( array_pop ( $td ) ) $t[] = '</td>' ;
636 if ( array_pop ( $tr ) ) $t[] = '</tr>' ;
637 $t[] = '</table>' ;
638 }
639
640 $t = implode ( "\n" , $t ) ;
641 # $t = $this->removeHTMLtags( $t );
642 wfProfileOut( $fname );
643 return $t ;
644 }
645
646 /**
647 * Helper function for parse() that transforms wiki markup into
648 * HTML. Only called for $mOutputType == OT_HTML.
649 *
650 * @access private
651 */
652 function internalParse( $text, $linestart, $args = array(), $isMain=true ) {
653 global $wgContLang;
654
655 $fname = 'Parser::internalParse';
656 wfProfileIn( $fname );
657
658 $text = $this->removeHTMLtags( $text );
659 $text = $this->replaceVariables( $text, $args );
660
661 $text = $wgContLang->convert($text);
662
663 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
664
665 $text = $this->doHeadings( $text );
666 if($this->mOptions->getUseDynamicDates()) {
667 global $wgDateFormatter;
668 $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text );
669 }
670 $text = $this->doAllQuotes( $text );
671 $text = $this->replaceInternalLinks ( $text );
672 # Another call to replace links and images inside captions of images
673 $text = $this->replaceInternalLinks ( $text );
674 $text = $this->replaceExternalLinks( $text );
675 $text = $this->doMagicLinks( $text );
676 $text = $this->doTableStuff( $text );
677 $text = $this->formatHeadings( $text, $isMain );
678 $sk =& $this->mOptions->getSkin();
679 $text = $sk->transformContent( $text );
680
681 wfProfileOut( $fname );
682 return $text;
683 }
684
685 /**
686 * Replace special strings like "ISBN xxx" and "RFC xxx" with
687 * magic external links.
688 *
689 * @access private
690 */
691 function &doMagicLinks( &$text ) {
692 global $wgUseGeoMode;
693 $text = $this->magicISBN( $text );
694 if ( isset( $wgUseGeoMode ) && $wgUseGeoMode ) {
695 $text = $this->magicGEO( $text );
696 }
697 $text = $this->magicRFC( $text );
698 return $text;
699 }
700
701 /**
702 * Parse ^^ tokens and return html
703 *
704 * @access private
705 */
706 function doExponent ( $text ) {
707 $fname = 'Parser::doExponent';
708 wfProfileIn( $fname);
709 $text = preg_replace('/\^\^(.*)\^\^/','<small><sup>\\1</sup></small>', $text);
710 wfProfileOut( $fname);
711 return $text;
712 }
713
714 /**
715 * Parse headers and return html
716 *
717 * @access private
718 */
719 function doHeadings( $text ) {
720 $fname = 'Parser::doHeadings';
721 wfProfileIn( $fname );
722 for ( $i = 6; $i >= 1; --$i ) {
723 $h = substr( '======', 0, $i );
724 $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m",
725 "<h{$i}>\\1</h{$i}>\\2", $text );
726 }
727 wfProfileOut( $fname );
728 return $text;
729 }
730
731 /**
732 * Replace single quotes with HTML markup
733 * @access private
734 * @return string the altered text
735 */
736 function doAllQuotes( $text ) {
737 $fname = 'Parser::doAllQuotes';
738 wfProfileIn( $fname );
739 $outtext = '';
740 $lines = explode( "\n", $text );
741 foreach ( $lines as $line ) {
742 $outtext .= $this->doQuotes ( $line ) . "\n";
743 }
744 $outtext = substr($outtext, 0,-1);
745 wfProfileOut( $fname );
746 return $outtext;
747 }
748
749 /**
750 * Helper function for doAllQuotes()
751 * @access private
752 */
753 function doQuotes( $text ) {
754 $arr = preg_split ("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
755 if (count ($arr) == 1)
756 return $text;
757 else
758 {
759 # First, do some preliminary work. This may shift some apostrophes from
760 # being mark-up to being text. It also counts the number of occurrences
761 # of bold and italics mark-ups.
762 $i = 0;
763 $numbold = 0;
764 $numitalics = 0;
765 foreach ($arr as $r)
766 {
767 if (($i % 2) == 1)
768 {
769 # If there are ever four apostrophes, assume the first is supposed to
770 # be text, and the remaining three constitute mark-up for bold text.
771 if (strlen ($arr[$i]) == 4)
772 {
773 $arr[$i-1] .= "'";
774 $arr[$i] = "'''";
775 }
776 # If there are more than 5 apostrophes in a row, assume they're all
777 # text except for the last 5.
778 else if (strlen ($arr[$i]) > 5)
779 {
780 $arr[$i-1] .= str_repeat ("'", strlen ($arr[$i]) - 5);
781 $arr[$i] = "'''''";
782 }
783 # Count the number of occurrences of bold and italics mark-ups.
784 # We are not counting sequences of five apostrophes.
785 if (strlen ($arr[$i]) == 2) $numitalics++; else
786 if (strlen ($arr[$i]) == 3) $numbold++; else
787 if (strlen ($arr[$i]) == 5) { $numitalics++; $numbold++; }
788 }
789 $i++;
790 }
791
792 # If there is an odd number of both bold and italics, it is likely
793 # that one of the bold ones was meant to be an apostrophe followed
794 # by italics. Which one we cannot know for certain, but it is more
795 # likely to be one that has a single-letter word before it.
796 if (($numbold % 2 == 1) && ($numitalics % 2 == 1))
797 {
798 $i = 0;
799 $firstsingleletterword = -1;
800 $firstmultiletterword = -1;
801 $firstspace = -1;
802 foreach ($arr as $r)
803 {
804 if (($i % 2 == 1) and (strlen ($r) == 3))
805 {
806 $x1 = substr ($arr[$i-1], -1);
807 $x2 = substr ($arr[$i-1], -2, 1);
808 if ($x1 == ' ') {
809 if ($firstspace == -1) $firstspace = $i;
810 } else if ($x2 == ' ') {
811 if ($firstsingleletterword == -1) $firstsingleletterword = $i;
812 } else {
813 if ($firstmultiletterword == -1) $firstmultiletterword = $i;
814 }
815 }
816 $i++;
817 }
818
819 # If there is a single-letter word, use it!
820 if ($firstsingleletterword > -1)
821 {
822 $arr [ $firstsingleletterword ] = "''";
823 $arr [ $firstsingleletterword-1 ] .= "'";
824 }
825 # If not, but there's a multi-letter word, use that one.
826 else if ($firstmultiletterword > -1)
827 {
828 $arr [ $firstmultiletterword ] = "''";
829 $arr [ $firstmultiletterword-1 ] .= "'";
830 }
831 # ... otherwise use the first one that has neither.
832 # (notice that it is possible for all three to be -1 if, for example,
833 # there is only one pentuple-apostrophe in the line)
834 else if ($firstspace > -1)
835 {
836 $arr [ $firstspace ] = "''";
837 $arr [ $firstspace-1 ] .= "'";
838 }
839 }
840
841 # Now let's actually convert our apostrophic mush to HTML!
842 $output = '';
843 $buffer = '';
844 $state = '';
845 $i = 0;
846 foreach ($arr as $r)
847 {
848 if (($i % 2) == 0)
849 {
850 if ($state == 'both')
851 $buffer .= $r;
852 else
853 $output .= $r;
854 }
855 else
856 {
857 if (strlen ($r) == 2)
858 {
859 if ($state == 'i')
860 { $output .= '</i>'; $state = ''; }
861 else if ($state == 'bi')
862 { $output .= '</i>'; $state = 'b'; }
863 else if ($state == 'ib')
864 { $output .= '</b></i><b>'; $state = 'b'; }
865 else if ($state == 'both')
866 { $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
867 else # $state can be 'b' or ''
868 { $output .= '<i>'; $state .= 'i'; }
869 }
870 else if (strlen ($r) == 3)
871 {
872 if ($state == 'b')
873 { $output .= '</b>'; $state = ''; }
874 else if ($state == 'bi')
875 { $output .= '</i></b><i>'; $state = 'i'; }
876 else if ($state == 'ib')
877 { $output .= '</b>'; $state = 'i'; }
878 else if ($state == 'both')
879 { $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
880 else # $state can be 'i' or ''
881 { $output .= '<b>'; $state .= 'b'; }
882 }
883 else if (strlen ($r) == 5)
884 {
885 if ($state == 'b')
886 { $output .= '</b><i>'; $state = 'i'; }
887 else if ($state == 'i')
888 { $output .= '</i><b>'; $state = 'b'; }
889 else if ($state == 'bi')
890 { $output .= '</i></b>'; $state = ''; }
891 else if ($state == 'ib')
892 { $output .= '</b></i>'; $state = ''; }
893 else if ($state == 'both')
894 { $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
895 else # ($state == '')
896 { $buffer = ''; $state = 'both'; }
897 }
898 }
899 $i++;
900 }
901 # Now close all remaining tags. Notice that the order is important.
902 if ($state == 'b' || $state == 'ib')
903 $output .= '</b>';
904 if ($state == 'i' || $state == 'bi' || $state == 'ib')
905 $output .= '</i>';
906 if ($state == 'bi')
907 $output .= '</b>';
908 if ($state == 'both')
909 $output .= '<b><i>'.$buffer.'</i></b>';
910 return $output;
911 }
912 }
913
914 /**
915 * Replace external links
916 *
917 * Note: we have to do external links before the internal ones,
918 * and otherwise take great care in the order of things here, so
919 * that we don't end up interpreting some URLs twice.
920 *
921 * @access private
922 */
923 function replaceExternalLinks( $text ) {
924 $fname = 'Parser::replaceExternalLinks';
925 wfProfileIn( $fname );
926
927 $sk =& $this->mOptions->getSkin();
928 $linktrail = wfMsgForContent('linktrail');
929 $bits = preg_split( EXT_LINK_BRACKETED, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
930
931 $s = $this->replaceFreeExternalLinks( array_shift( $bits ) );
932
933 $i = 0;
934 while ( $i<count( $bits ) ) {
935 $url = $bits[$i++];
936 $protocol = $bits[$i++];
937 $text = $bits[$i++];
938 $trail = $bits[$i++];
939
940 # If the link text is an image URL, replace it with an <img> tag
941 # This happened by accident in the original parser, but some people used it extensively
942 $img = $this->maybeMakeImageLink( $text );
943 if ( $img !== false ) {
944 $text = $img;
945 }
946
947 $dtrail = '';
948
949 # No link text, e.g. [http://domain.tld/some.link]
950 if ( $text == '' ) {
951 # Autonumber if allowed
952 if ( strpos( HTTP_PROTOCOLS, $protocol ) !== false ) {
953 $text = '[' . ++$this->mAutonumber . ']';
954 } else {
955 # Otherwise just use the URL
956 $text = htmlspecialchars( $url );
957 }
958 } else {
959 # Have link text, e.g. [http://domain.tld/some.link text]s
960 # Check for trail
961 if ( preg_match( $linktrail, $trail, $m2 ) ) {
962 $dtrail = $m2[1];
963 $trail = $m2[2];
964 }
965 }
966
967 $encUrl = htmlspecialchars( $url );
968 # Bit in parentheses showing the URL for the printable version
969 if( $url == $text || preg_match( "!$protocol://" . preg_quote( $text, '/' ) . "/?$!", $url ) ) {
970 $paren = '';
971 } else {
972 # Expand the URL for printable version
973 if ( ! $sk->suppressUrlExpansion() ) {
974 $paren = "<span class='urlexpansion'> (<i>" . htmlspecialchars ( $encUrl ) . "</i>)</span>";
975 } else {
976 $paren = '';
977 }
978 }
979
980 # Process the trail (i.e. everything after this link up until start of the next link),
981 # replacing any non-bracketed links
982 $trail = $this->replaceFreeExternalLinks( $trail );
983
984 $la = $sk->getExternalLinkAttributes( $url, $text );
985
986 # Use the encoded URL
987 # This means that users can paste URLs directly into the text
988 # Funny characters like &ouml; aren't valid in URLs anyway
989 # This was changed in August 2004
990 $s .= "<a href=\"{$url}\"{$la}>{$text}</a>{$dtrail}{$paren}{$trail}";
991 }
992
993 wfProfileOut( $fname );
994 return $s;
995 }
996
997 /**
998 * Replace anything that looks like a URL with a link
999 * @access private
1000 */
1001 function replaceFreeExternalLinks( $text ) {
1002 $bits = preg_split( '/((?:'.URL_PROTOCOLS.'):)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1003 $s = array_shift( $bits );
1004 $i = 0;
1005
1006 $sk =& $this->mOptions->getSkin();
1007
1008 while ( $i < count( $bits ) ){
1009 $protocol = $bits[$i++];
1010 $remainder = $bits[$i++];
1011
1012 if ( preg_match( '/^('.EXT_LINK_URL_CLASS.'+)(.*)$/s', $remainder, $m ) ) {
1013 # Found some characters after the protocol that look promising
1014 $url = $protocol . $m[1];
1015 $trail = $m[2];
1016
1017 # Move trailing punctuation to $trail
1018 $sep = ',;\.:!?';
1019 # If there is no left bracket, then consider right brackets fair game too
1020 if ( strpos( $url, '(' ) === false ) {
1021 $sep .= ')';
1022 }
1023
1024 $numSepChars = strspn( strrev( $url ), $sep );
1025 if ( $numSepChars ) {
1026 $trail = substr( $url, -$numSepChars ) . $trail;
1027 $url = substr( $url, 0, -$numSepChars );
1028 }
1029
1030 # Replace &amp; from obsolete syntax with &
1031 $url = str_replace( '&amp;', '&', $url );
1032
1033 # Is this an external image?
1034 $text = $this->maybeMakeImageLink( $url );
1035 if ( $text === false ) {
1036 # Not an image, make a link
1037 $text = $sk->makeExternalLink( $url, $url );
1038 }
1039 $s .= $text . $trail;
1040 } else {
1041 $s .= $protocol . $remainder;
1042 }
1043 }
1044 return $s;
1045 }
1046
1047 /**
1048 * make an image if it's allowed
1049 * @access private
1050 */
1051 function maybeMakeImageLink( $url ) {
1052 $sk =& $this->mOptions->getSkin();
1053 $text = false;
1054 if ( $this->mOptions->getAllowExternalImages() ) {
1055 if ( preg_match( EXT_IMAGE_REGEX, $url ) ) {
1056 # Image found
1057 $text = $sk->makeImage( htmlspecialchars( $url ) );
1058 }
1059 }
1060 return $text;
1061 }
1062
1063 /**
1064 * Process [[ ]] wikilinks
1065 *
1066 * @access private
1067 */
1068 function replaceInternalLinks( $s ) {
1069 global $wgLang, $wgContLang, $wgLinkCache;
1070 static $fname = 'Parser::replaceInternalLinks' ;
1071 wfProfileIn( $fname );
1072
1073 wfProfileIn( $fname.'-setup' );
1074 static $tc = FALSE;
1075 # the % is needed to support urlencoded titles as well
1076 if ( !$tc ) { $tc = Title::legalChars() . '#%'; }
1077 $sk =& $this->mOptions->getSkin();
1078
1079 $redirect = MagicWord::get ( MAG_REDIRECT ) ;
1080
1081 $a = explode( '[[', ' ' . $s );
1082 $s = array_shift( $a );
1083 $s = substr( $s, 1 );
1084
1085 # Match a link having the form [[namespace:link|alternate]]trail
1086 static $e1 = FALSE;
1087 if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; }
1088 # Match the end of a line for a word that's not followed by whitespace,
1089 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
1090 static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD';
1091
1092 $useLinkPrefixExtension = $wgContLang->linkPrefixExtension();
1093 # Special and Media are pseudo-namespaces; no pages actually exist in them
1094
1095 $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() );
1096
1097 if ( $useLinkPrefixExtension ) {
1098 if ( preg_match( $e2, $s, $m ) ) {
1099 $first_prefix = $m[2];
1100 $s = $m[1];
1101 } else {
1102 $first_prefix = false;
1103 }
1104 } else {
1105 $prefix = '';
1106 }
1107
1108 wfProfileOut( $fname.'-setup' );
1109
1110 # start procedeeding each line
1111 foreach ( $a as $line ) {
1112 wfProfileIn( $fname.'-prefixhandling' );
1113 if ( $useLinkPrefixExtension ) {
1114 if ( preg_match( $e2, $s, $m ) ) {
1115 $prefix = $m[2];
1116 $s = $m[1];
1117 } else {
1118 $prefix='';
1119 }
1120 # first link
1121 if($first_prefix) {
1122 $prefix = $first_prefix;
1123 $first_prefix = false;
1124 }
1125 }
1126 wfProfileOut( $fname.'-prefixhandling' );
1127
1128 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
1129 $text = $m[2];
1130 # fix up urlencoded title texts
1131 if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]);
1132 $trail = $m[3];
1133 } else { # Invalid form; output directly
1134 $s .= $prefix . '[[' . $line ;
1135 continue;
1136 }
1137
1138 # Don't allow internal links to pages containing
1139 # PROTO: where PROTO is a valid URL protocol; these
1140 # should be external links.
1141 if (preg_match('/((?:'.URL_PROTOCOLS.'):)/', $m[1])) {
1142 $s .= $prefix . '[[' . $line ;
1143 continue;
1144 }
1145
1146 # Make subpage if necessary
1147 $link = $this->maybeDoSubpageLink( $m[1], $text );
1148
1149 $noforce = (substr($m[1], 0, 1) != ':');
1150 if (!$noforce) {
1151 # Strip off leading ':'
1152 $link = substr($link, 1);
1153 }
1154
1155 $wasblank = ( '' == $text );
1156 if( $wasblank ) $text = $link;
1157
1158 $nt = Title::newFromText( $link );
1159 if( !$nt ) {
1160 $s .= $prefix . '[[' . $line;
1161 continue;
1162 }
1163
1164 //check other language variants of the link
1165 //if the article does not exist
1166 if($nt->getArticleID() == 0) {
1167 global $wgContLang;
1168 $variants = $wgContLang->getVariants();
1169 $varnt = false;
1170 if(sizeof($variants) > 1) {
1171 foreach ( $variants as $v ) {
1172 if($v == $wgContLang->getPreferredVariant())
1173 continue;
1174 $varlink = $wgContLang->autoConvert($link, $v);
1175 $varnt = Title::newFromText($varlink);
1176 if($varnt && $varnt->getArticleID()>0) {
1177 break;
1178 }
1179 }
1180 }
1181 if($varnt && $varnt->getArticleID()>0) {
1182 $nt = $varnt;
1183 $link = $varlink;
1184 }
1185 }
1186
1187 $ns = $nt->getNamespace();
1188 $iw = $nt->getInterWiki();
1189
1190 # Link not escaped by : , create the various objects
1191 if( $noforce ) {
1192
1193 # Interwikis
1194 if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgContLang->getLanguageName( $iw ) ) {
1195 array_push( $this->mOutput->mLanguageLinks, $nt->getFullText() );
1196 $tmp = $prefix . $trail ;
1197 $s .= (trim($tmp) == '')? '': $tmp;
1198 continue;
1199 }
1200
1201 if ( $ns == NS_IMAGE ) {
1202 $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail;
1203 $wgLinkCache->addImageLinkObj( $nt );
1204 continue;
1205 }
1206
1207 if ( $ns == NS_CATEGORY ) {
1208 $t = $nt->getText() ;
1209 $nnt = Title::newFromText ( Namespace::getCanonicalName(NS_CATEGORY).':'.$t ) ;
1210
1211 $wgLinkCache->suspend(); # Don't save in links/brokenlinks
1212 $pPLC=$sk->postParseLinkColour();
1213 $sk->postParseLinkColour( false );
1214 $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix );
1215 $sk->postParseLinkColour( $pPLC );
1216 $wgLinkCache->resume();
1217
1218 if ( $wasblank ) {
1219 if ( $this->mTitle->getNamespace() == NS_CATEGORY ) {
1220 $sortkey = $this->mTitle->getText();
1221 } else {
1222 $sortkey = $this->mTitle->getPrefixedText();
1223 }
1224 } else {
1225 $sortkey = $text;
1226 }
1227 $wgLinkCache->addCategoryLinkObj( $nt, $sortkey );
1228 $this->mOutput->mCategoryLinks[] = $t ;
1229 $s .= $prefix . $trail ;
1230 continue;
1231 }
1232 }
1233
1234 if( ( $nt->getPrefixedText() === $this->mTitle->getPrefixedText() ) &&
1235 ( strpos( $link, '#' ) === FALSE ) ) {
1236 # Self-links are handled specially; generally de-link and change to bold.
1237 $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail );
1238 continue;
1239 }
1240
1241 if( $ns == NS_MEDIA ) {
1242 $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail;
1243 $wgLinkCache->addImageLinkObj( $nt );
1244 continue;
1245 } elseif( $ns == NS_SPECIAL ) {
1246 $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail );
1247 continue;
1248 }
1249 $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix );
1250 }
1251 wfProfileOut( $fname );
1252 return $s;
1253 }
1254
1255 /**
1256 * Handle link to subpage if necessary
1257 * @param $target string the source of the link
1258 * @param &$text the link text, modified as necessary
1259 * @return string the full name of the link
1260 * @access private
1261 */
1262 function maybeDoSubpageLink($target, &$text) {
1263 # Valid link forms:
1264 # Foobar -- normal
1265 # :Foobar -- override special treatment of prefix (images, language links)
1266 # /Foobar -- convert to CurrentPage/Foobar
1267 # /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text
1268 global $wgNamespacesWithSubpages;
1269
1270 $fname = 'Parser::maybeDoSubpageLink';
1271 wfProfileIn( $fname );
1272 # Look at the first character
1273 if( $target{0} == '/' ) {
1274 # / at end means we don't want the slash to be shown
1275 if(substr($target,-1,1)=='/') {
1276 $target=substr($target,1,-1);
1277 $noslash=$target;
1278 } else {
1279 $noslash=substr($target,1);
1280 }
1281
1282 # Some namespaces don't allow subpages
1283 if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) {
1284 # subpages allowed here
1285 $ret = $this->mTitle->getPrefixedText(). '/' . trim($noslash);
1286 if( '' === $text ) {
1287 $text = $target;
1288 } # this might be changed for ugliness reasons
1289 } else {
1290 # no subpage allowed, use standard link
1291 $ret = $target;
1292 }
1293 } else {
1294 # no subpage
1295 $ret = $target;
1296 }
1297
1298 wfProfileOut( $fname );
1299 return $ret;
1300 }
1301
1302 /**#@+
1303 * Used by doBlockLevels()
1304 * @access private
1305 */
1306 /* private */ function closeParagraph() {
1307 $result = '';
1308 if ( '' != $this->mLastSection ) {
1309 $result = '</' . $this->mLastSection . ">\n";
1310 }
1311 $this->mInPre = false;
1312 $this->mLastSection = '';
1313 return $result;
1314 }
1315 # getCommon() returns the length of the longest common substring
1316 # of both arguments, starting at the beginning of both.
1317 #
1318 /* private */ function getCommon( $st1, $st2 ) {
1319 $fl = strlen( $st1 );
1320 $shorter = strlen( $st2 );
1321 if ( $fl < $shorter ) { $shorter = $fl; }
1322
1323 for ( $i = 0; $i < $shorter; ++$i ) {
1324 if ( $st1{$i} != $st2{$i} ) { break; }
1325 }
1326 return $i;
1327 }
1328 # These next three functions open, continue, and close the list
1329 # element appropriate to the prefix character passed into them.
1330 #
1331 /* private */ function openList( $char ) {
1332 $result = $this->closeParagraph();
1333
1334 if ( '*' == $char ) { $result .= '<ul><li>'; }
1335 else if ( '#' == $char ) { $result .= '<ol><li>'; }
1336 else if ( ':' == $char ) { $result .= '<dl><dd>'; }
1337 else if ( ';' == $char ) {
1338 $result .= '<dl><dt>';
1339 $this->mDTopen = true;
1340 }
1341 else { $result = '<!-- ERR 1 -->'; }
1342
1343 return $result;
1344 }
1345
1346 /* private */ function nextItem( $char ) {
1347 if ( '*' == $char || '#' == $char ) { return '</li><li>'; }
1348 else if ( ':' == $char || ';' == $char ) {
1349 $close = '</dd>';
1350 if ( $this->mDTopen ) { $close = '</dt>'; }
1351 if ( ';' == $char ) {
1352 $this->mDTopen = true;
1353 return $close . '<dt>';
1354 } else {
1355 $this->mDTopen = false;
1356 return $close . '<dd>';
1357 }
1358 }
1359 return '<!-- ERR 2 -->';
1360 }
1361
1362 /* private */ function closeList( $char ) {
1363 if ( '*' == $char ) { $text = '</li></ul>'; }
1364 else if ( '#' == $char ) { $text = '</li></ol>'; }
1365 else if ( ':' == $char ) {
1366 if ( $this->mDTopen ) {
1367 $this->mDTopen = false;
1368 $text = '</dt></dl>';
1369 } else {
1370 $text = '</dd></dl>';
1371 }
1372 }
1373 else { return '<!-- ERR 3 -->'; }
1374 return $text."\n";
1375 }
1376 /**#@-*/
1377
1378 /**
1379 * Make lists from lines starting with ':', '*', '#', etc.
1380 *
1381 * @access private
1382 * @return string the lists rendered as HTML
1383 */
1384 function doBlockLevels( $text, $linestart ) {
1385 $fname = 'Parser::doBlockLevels';
1386 wfProfileIn( $fname );
1387
1388 # Parsing through the text line by line. The main thing
1389 # happening here is handling of block-level elements p, pre,
1390 # and making lists from lines starting with * # : etc.
1391 #
1392 $textLines = explode( "\n", $text );
1393
1394 $lastPrefix = $output = $lastLine = '';
1395 $this->mDTopen = $inBlockElem = false;
1396 $prefixLength = 0;
1397 $paragraphStack = false;
1398
1399 if ( !$linestart ) {
1400 $output .= array_shift( $textLines );
1401 }
1402 foreach ( $textLines as $oLine ) {
1403 $lastPrefixLength = strlen( $lastPrefix );
1404 $preCloseMatch = preg_match('/<\\/pre/i', $oLine );
1405 $preOpenMatch = preg_match('/<pre/i', $oLine );
1406 if ( !$this->mInPre ) {
1407 # Multiple prefixes may abut each other for nested lists.
1408 $prefixLength = strspn( $oLine, '*#:;' );
1409 $pref = substr( $oLine, 0, $prefixLength );
1410
1411 # eh?
1412 $pref2 = str_replace( ';', ':', $pref );
1413 $t = substr( $oLine, $prefixLength );
1414 $this->mInPre = !empty($preOpenMatch);
1415 } else {
1416 # Don't interpret any other prefixes in preformatted text
1417 $prefixLength = 0;
1418 $pref = $pref2 = '';
1419 $t = $oLine;
1420 }
1421
1422 # List generation
1423 if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
1424 # Same as the last item, so no need to deal with nesting or opening stuff
1425 $output .= $this->nextItem( substr( $pref, -1 ) );
1426 $paragraphStack = false;
1427
1428 if ( substr( $pref, -1 ) == ';') {
1429 # The one nasty exception: definition lists work like this:
1430 # ; title : definition text
1431 # So we check for : in the remainder text to split up the
1432 # title and definition, without b0rking links.
1433 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1434 $t = $t2;
1435 $output .= $term . $this->nextItem( ':' );
1436 }
1437 }
1438 } elseif( $prefixLength || $lastPrefixLength ) {
1439 # Either open or close a level...
1440 $commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
1441 $paragraphStack = false;
1442
1443 while( $commonPrefixLength < $lastPrefixLength ) {
1444 $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
1445 --$lastPrefixLength;
1446 }
1447 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
1448 $output .= $this->nextItem( $pref{$commonPrefixLength-1} );
1449 }
1450 while ( $prefixLength > $commonPrefixLength ) {
1451 $char = substr( $pref, $commonPrefixLength, 1 );
1452 $output .= $this->openList( $char );
1453
1454 if ( ';' == $char ) {
1455 # FIXME: This is dupe of code above
1456 if ($this->findColonNoLinks($t, $term, $t2) !== false) {
1457 $t = $t2;
1458 $output .= $term . $this->nextItem( ':' );
1459 }
1460 }
1461 ++$commonPrefixLength;
1462 }
1463 $lastPrefix = $pref2;
1464 }
1465 if( 0 == $prefixLength ) {
1466 # No prefix (not in list)--go to paragraph mode
1467 $uniq_prefix = UNIQ_PREFIX;
1468 // XXX: use a stack for nestable elements like span, table and div
1469 $openmatch = preg_match('/(<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|<p|<ul|<li|<\\/tr|<\\/td|<\\/th)/i', $t );
1470 $closematch = preg_match(
1471 '/(<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'.
1472 '<td|<th|<div|<\\/div|<hr|<\\/pre|<\\/p|'.$uniq_prefix.'-pre|<\\/li|<\\/ul)/i', $t );
1473 if ( $openmatch or $closematch ) {
1474 $paragraphStack = false;
1475 $output .= $this->closeParagraph();
1476 if($preOpenMatch and !$preCloseMatch) {
1477 $this->mInPre = true;
1478 }
1479 if ( $closematch ) {
1480 $inBlockElem = false;
1481 } else {
1482 $inBlockElem = true;
1483 }
1484 } else if ( !$inBlockElem && !$this->mInPre ) {
1485 if ( ' ' == $t{0} and ( $this->mLastSection == 'pre' or trim($t) != '' ) ) {
1486 // pre
1487 if ($this->mLastSection != 'pre') {
1488 $paragraphStack = false;
1489 $output .= $this->closeParagraph().'<pre>';
1490 $this->mLastSection = 'pre';
1491 }
1492 $t = substr( $t, 1 );
1493 } else {
1494 // paragraph
1495 if ( '' == trim($t) ) {
1496 if ( $paragraphStack ) {
1497 $output .= $paragraphStack.'<br />';
1498 $paragraphStack = false;
1499 $this->mLastSection = 'p';
1500 } else {
1501 if ($this->mLastSection != 'p' ) {
1502 $output .= $this->closeParagraph();
1503 $this->mLastSection = '';
1504 $paragraphStack = '<p>';
1505 } else {
1506 $paragraphStack = '</p><p>';
1507 }
1508 }
1509 } else {
1510 if ( $paragraphStack ) {
1511 $output .= $paragraphStack;
1512 $paragraphStack = false;
1513 $this->mLastSection = 'p';
1514 } else if ($this->mLastSection != 'p') {
1515 $output .= $this->closeParagraph().'<p>';
1516 $this->mLastSection = 'p';
1517 }
1518 }
1519 }
1520 }
1521 }
1522 if ($paragraphStack === false) {
1523 $output .= $t."\n";
1524 }
1525 }
1526 while ( $prefixLength ) {
1527 $output .= $this->closeList( $pref2{$prefixLength-1} );
1528 --$prefixLength;
1529 }
1530 if ( '' != $this->mLastSection ) {
1531 $output .= '</' . $this->mLastSection . '>';
1532 $this->mLastSection = '';
1533 }
1534
1535 wfProfileOut( $fname );
1536 return $output;
1537 }
1538
1539 /**
1540 * Split up a string on ':', ignoring any occurences inside
1541 * <a>..</a> or <span>...</span>
1542 * @param $str string the string to split
1543 * @param &$before string set to everything before the ':'
1544 * @param &$after string set to everything after the ':'
1545 * return string the position of the ':', or false if none found
1546 */
1547 function findColonNoLinks($str, &$before, &$after) {
1548 # I wonder if we should make this count all tags, not just <a>
1549 # and <span>. That would prevent us from matching a ':' that
1550 # comes in the middle of italics other such formatting....
1551 # -- Wil
1552 $fname = 'Parser::findColonNoLinks';
1553 wfProfileIn( $fname );
1554 $pos = 0;
1555 do {
1556 $colon = strpos($str, ':', $pos);
1557
1558 if ($colon !== false) {
1559 $before = substr($str, 0, $colon);
1560 $after = substr($str, $colon + 1);
1561
1562 # Skip any ':' within <a> or <span> pairs
1563 $a = substr_count($before, '<a');
1564 $s = substr_count($before, '<span');
1565 $ca = substr_count($before, '</a>');
1566 $cs = substr_count($before, '</span>');
1567
1568 if ($a <= $ca and $s <= $cs) {
1569 # Tags are balanced before ':'; ok
1570 break;
1571 }
1572 $pos = $colon + 1;
1573 }
1574 } while ($colon !== false);
1575 wfProfileOut( $fname );
1576 return $colon;
1577 }
1578
1579 /**
1580 * Return value of a magic variable (like PAGENAME)
1581 *
1582 * @access private
1583 */
1584 function getVariableValue( $index ) {
1585 global $wgContLang, $wgSitename, $wgServer;
1586
1587 switch ( $index ) {
1588 case MAG_CURRENTMONTH:
1589 return $wgContLang->formatNum( date( 'm' ) );
1590 case MAG_CURRENTMONTHNAME:
1591 return $wgContLang->getMonthName( date('n') );
1592 case MAG_CURRENTMONTHNAMEGEN:
1593 return $wgContLang->getMonthNameGen( date('n') );
1594 case MAG_CURRENTDAY:
1595 return $wgContLang->formatNum( date('j') );
1596 case MAG_PAGENAME:
1597 return $this->mTitle->getText();
1598 case MAG_PAGENAMEE:
1599 return $this->mTitle->getPartialURL();
1600 case MAG_NAMESPACE:
1601 # return Namespace::getCanonicalName($this->mTitle->getNamespace());
1602 return $wgContLang->getNsText($this->mTitle->getNamespace()); # Patch by Dori
1603 case MAG_CURRENTDAYNAME:
1604 return $wgContLang->getWeekdayName( date('w')+1 );
1605 case MAG_CURRENTYEAR:
1606 return $wgContLang->formatNum( date( 'Y' ) );
1607 case MAG_CURRENTTIME:
1608 return $wgContLang->time( wfTimestampNow(), false );
1609 case MAG_NUMBEROFARTICLES:
1610 return $wgContLang->formatNum( wfNumberOfArticles() );
1611 case MAG_SITENAME:
1612 return $wgSitename;
1613 case MAG_SERVER:
1614 return $wgServer;
1615 default:
1616 return NULL;
1617 }
1618 }
1619
1620 /**
1621 * initialise the magic variables (like CURRENTMONTHNAME)
1622 *
1623 * @access private
1624 */
1625 function initialiseVariables() {
1626 $fname = 'Parser::initialiseVariables';
1627 wfProfileIn( $fname );
1628 global $wgVariableIDs;
1629 $this->mVariables = array();
1630 foreach ( $wgVariableIDs as $id ) {
1631 $mw =& MagicWord::get( $id );
1632 $mw->addToArray( $this->mVariables, $this->getVariableValue( $id ) );
1633 }
1634 wfProfileOut( $fname );
1635 }
1636
1637 /**
1638 * Replace magic variables, templates, and template arguments
1639 * with the appropriate text. Templates are substituted recursively,
1640 * taking care to avoid infinite loops.
1641 *
1642 * Note that the substitution depends on value of $mOutputType:
1643 * OT_WIKI: only {{subst:}} templates
1644 * OT_MSG: only magic variables
1645 * OT_HTML: all templates and magic variables
1646 *
1647 * @param string $tex The text to transform
1648 * @param array $args Key-value pairs representing template parameters to substitute
1649 * @access private
1650 */
1651 function replaceVariables( $text, $args = array() ) {
1652 global $wgLang, $wgScript, $wgArticlePath;
1653
1654 # Prevent too big inclusions
1655 if(strlen($text)> MAX_INCLUDE_SIZE)
1656 return $text;
1657
1658 $fname = 'Parser::replaceVariables';
1659 wfProfileIn( $fname );
1660
1661 $titleChars = Title::legalChars();
1662
1663 # This function is called recursively. To keep track of arguments we need a stack:
1664 array_push( $this->mArgStack, $args );
1665
1666 # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array
1667 $GLOBALS['wgCurParser'] =& $this;
1668
1669 # Variable substitution
1670 $text = preg_replace_callback( "/{{([$titleChars]*?)}}/", 'wfVariableSubstitution', $text );
1671
1672 if ( $this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI ) {
1673 # Argument substitution
1674 $text = preg_replace_callback( "/{{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text );
1675 }
1676 # Template substitution
1677 $regex = '/(\\n|{)?{{(['.$titleChars.']*)(\\|.*?|)}}/s';
1678 $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text );
1679
1680 array_pop( $this->mArgStack );
1681
1682 wfProfileOut( $fname );
1683 return $text;
1684 }
1685
1686 /**
1687 * Replace magic variables
1688 * @access private
1689 */
1690 function variableSubstitution( $matches ) {
1691 if ( !$this->mVariables ) {
1692 $this->initialiseVariables();
1693 }
1694 $skip = false;
1695 if ( $this->mOutputType == OT_WIKI ) {
1696 # Do only magic variables prefixed by SUBST
1697 $mwSubst =& MagicWord::get( MAG_SUBST );
1698 if (!$mwSubst->matchStartAndRemove( $matches[1] ))
1699 $skip = true;
1700 # Note that if we don't substitute the variable below,
1701 # we don't remove the {{subst:}} magic word, in case
1702 # it is a template rather than a magic variable.
1703 }
1704 if ( !$skip && array_key_exists( $matches[1], $this->mVariables ) ) {
1705 $text = $this->mVariables[$matches[1]];
1706 $this->mOutput->mContainsOldMagic = true;
1707 } else {
1708 $text = $matches[0];
1709 }
1710 return $text;
1711 }
1712
1713 # Split template arguments
1714 function getTemplateArgs( $argsString ) {
1715 if ( $argsString === '' ) {
1716 return array();
1717 }
1718
1719 $args = explode( '|', substr( $argsString, 1 ) );
1720
1721 # If any of the arguments contains a '[[' but no ']]', it needs to be
1722 # merged with the next arg because the '|' character between belongs
1723 # to the link syntax and not the template parameter syntax.
1724 $argc = count($args);
1725 $i = 0;
1726 for ( $i = 0; $i < $argc-1; $i++ ) {
1727 if ( substr_count ( $args[$i], '[[' ) != substr_count ( $args[$i], ']]' ) ) {
1728 $args[$i] .= '|'.$args[$i+1];
1729 array_splice($args, $i+1, 1);
1730 $i--;
1731 $argc--;
1732 }
1733 }
1734
1735 return $args;
1736 }
1737
1738 /**
1739 * Return the text of a template, after recursively
1740 * replacing any variables or templates within the template.
1741 *
1742 * @param array $matches The parts of the template
1743 * $matches[1]: the title, i.e. the part before the |
1744 * $matches[2]: the parameters (including a leading |), if any
1745 * @return string the text of the template
1746 * @access private
1747 */
1748 function braceSubstitution( $matches ) {
1749 global $wgLinkCache, $wgContLang;
1750 $fname = 'Parser::braceSubstitution';
1751 $found = false;
1752 $nowiki = false;
1753 $noparse = false;
1754
1755 $title = NULL;
1756
1757 # Need to know if the template comes at the start of a line,
1758 # to treat the beginning of the template like the beginning
1759 # of a line for tables and block-level elements.
1760 $linestart = $matches[1];
1761
1762 # $part1 is the bit before the first |, and must contain only title characters
1763 # $args is a list of arguments, starting from index 0, not including $part1
1764
1765 $part1 = $matches[2];
1766 # If the third subpattern matched anything, it will start with |
1767
1768 $args = $this->getTemplateArgs($matches[3]);
1769 $argc = count( $args );
1770
1771 # Don't parse {{{}}} because that's only for template arguments
1772 if ( $linestart === '{' ) {
1773 $text = $matches[0];
1774 $found = true;
1775 $noparse = true;
1776 }
1777
1778 # SUBST
1779 if ( !$found ) {
1780 $mwSubst =& MagicWord::get( MAG_SUBST );
1781 if ( $mwSubst->matchStartAndRemove( $part1 ) xor ($this->mOutputType == OT_WIKI) ) {
1782 # One of two possibilities is true:
1783 # 1) Found SUBST but not in the PST phase
1784 # 2) Didn't find SUBST and in the PST phase
1785 # In either case, return without further processing
1786 $text = $matches[0];
1787 $found = true;
1788 $noparse = true;
1789 }
1790 }
1791
1792 # MSG, MSGNW and INT
1793 if ( !$found ) {
1794 # Check for MSGNW:
1795 $mwMsgnw =& MagicWord::get( MAG_MSGNW );
1796 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
1797 $nowiki = true;
1798 } else {
1799 # Remove obsolete MSG:
1800 $mwMsg =& MagicWord::get( MAG_MSG );
1801 $mwMsg->matchStartAndRemove( $part1 );
1802 }
1803
1804 # Check if it is an internal message
1805 $mwInt =& MagicWord::get( MAG_INT );
1806 if ( $mwInt->matchStartAndRemove( $part1 ) ) {
1807 if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) {
1808 $text = $linestart . wfMsgReal( $part1, $args, true );
1809 $found = true;
1810 }
1811 }
1812 }
1813
1814 # NS
1815 if ( !$found ) {
1816 # Check for NS: (namespace expansion)
1817 $mwNs = MagicWord::get( MAG_NS );
1818 if ( $mwNs->matchStartAndRemove( $part1 ) ) {
1819 if ( intval( $part1 ) ) {
1820 $text = $linestart . $wgContLang->getNsText( intval( $part1 ) );
1821 $found = true;
1822 } else {
1823 $index = Namespace::getCanonicalIndex( strtolower( $part1 ) );
1824 if ( !is_null( $index ) ) {
1825 $text = $linestart . $wgContLang->getNsText( $index );
1826 $found = true;
1827 }
1828 }
1829 }
1830 }
1831
1832 # LOCALURL and LOCALURLE
1833 if ( !$found ) {
1834 $mwLocal = MagicWord::get( MAG_LOCALURL );
1835 $mwLocalE = MagicWord::get( MAG_LOCALURLE );
1836
1837 if ( $mwLocal->matchStartAndRemove( $part1 ) ) {
1838 $func = 'getLocalURL';
1839 } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) {
1840 $func = 'escapeLocalURL';
1841 } else {
1842 $func = '';
1843 }
1844
1845 if ( $func !== '' ) {
1846 $title = Title::newFromText( $part1 );
1847 if ( !is_null( $title ) ) {
1848 if ( $argc > 0 ) {
1849 $text = $linestart . $title->$func( $args[0] );
1850 } else {
1851 $text = $linestart . $title->$func();
1852 }
1853 $found = true;
1854 }
1855 }
1856 }
1857
1858 # GRAMMAR
1859 if ( !$found && $argc == 1 ) {
1860 $mwGrammar =& MagicWord::get( MAG_GRAMMAR );
1861 if ( $mwGrammar->matchStartAndRemove( $part1 ) ) {
1862 $text = $linestart . $wgContLang->convertGrammar( $args[0], $part1 );
1863 $found = true;
1864 }
1865 }
1866
1867 # Template table test
1868
1869 # Did we encounter this template already? If yes, it is in the cache
1870 # and we need to check for loops.
1871 if ( !$found && isset( $this->mTemplates[$part1] ) ) {
1872 # set $text to cached message.
1873 $text = $linestart . $this->mTemplates[$part1];
1874 $found = true;
1875
1876 # Infinite loop test
1877 if ( isset( $this->mTemplatePath[$part1] ) ) {
1878 $noparse = true;
1879 $found = true;
1880 $text .= '<!-- WARNING: template loop detected -->';
1881 }
1882 }
1883
1884 # Load from database
1885 $itcamefromthedatabase = false;
1886 if ( !$found ) {
1887 $ns = NS_TEMPLATE;
1888 $part1 = $this->maybeDoSubpageLink( $part1, $subpage='' );
1889 if ($subpage !== '') {
1890 $ns = $this->mTitle->getNamespace();
1891 }
1892 $title = Title::newFromText( $part1, $ns );
1893 if ( !is_null( $title ) && !$title->isExternal() ) {
1894 # Check for excessive inclusion
1895 $dbk = $title->getPrefixedDBkey();
1896 if ( $this->incrementIncludeCount( $dbk ) ) {
1897 # This should never be reached.
1898 $article = new Article( $title );
1899 $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals();
1900 if ( $articleContent !== false ) {
1901 $found = true;
1902 $text = $linestart . $articleContent;
1903 $itcamefromthedatabase = true;
1904 }
1905 }
1906
1907 # If the title is valid but undisplayable, make a link to it
1908 if ( $this->mOutputType == OT_HTML && !$found ) {
1909 $text = $linestart . '[['.$title->getPrefixedText().']]';
1910 $found = true;
1911 }
1912
1913 # Template cache array insertion
1914 $this->mTemplates[$part1] = $text;
1915 }
1916 }
1917
1918 # Recursive parsing, escaping and link table handling
1919 # Only for HTML output
1920 if ( $nowiki && $found && $this->mOutputType == OT_HTML ) {
1921 $text = wfEscapeWikiText( $text );
1922 } elseif ( ($this->mOutputType == OT_HTML || $this->mOutputType == OT_WIKI) && $found && !$noparse) {
1923 # Clean up argument array
1924 $assocArgs = array();
1925 $index = 1;
1926 foreach( $args as $arg ) {
1927 $eqpos = strpos( $arg, '=' );
1928 if ( $eqpos === false ) {
1929 $assocArgs[$index++] = $arg;
1930 } else {
1931 $name = trim( substr( $arg, 0, $eqpos ) );
1932 $value = trim( substr( $arg, $eqpos+1 ) );
1933 if ( $value === false ) {
1934 $value = '';
1935 }
1936 if ( $name !== false ) {
1937 $assocArgs[$name] = $value;
1938 }
1939 }
1940 }
1941
1942 # Add a new element to the templace recursion path
1943 $this->mTemplatePath[$part1] = 1;
1944
1945 $text = $this->strip( $text, $this->mStripState );
1946 $text = $this->removeHTMLtags( $text );
1947 $text = $this->replaceVariables( $text, $assocArgs );
1948
1949 # Resume the link cache and register the inclusion as a link
1950 if ( $this->mOutputType == OT_HTML && !is_null( $title ) ) {
1951 $wgLinkCache->addLinkObj( $title );
1952 }
1953
1954 # If the template begins with a table or block-level
1955 # element, it should be treated as beginning a new line.
1956 if ($linestart !== '\n' && preg_match('/^({\\||:|;|#|\*)/', $text)) {
1957 $text = "\n" . $text;
1958 }
1959 }
1960
1961 # Empties the template path
1962 $this->mTemplatePath = array();
1963 if ( !$found ) {
1964 return $matches[0];
1965 } else {
1966 # replace ==section headers==
1967 # XXX this needs to go away once we have a better parser.
1968 if ( $this->mOutputType != OT_WIKI && $itcamefromthedatabase ) {
1969 if( !is_null( $title ) )
1970 $encodedname = base64_encode($title->getPrefixedDBkey());
1971 else
1972 $encodedname = base64_encode("");
1973 $m = preg_split('/(^={1,6}.*?={1,6}\s*?$)/m', $text, -1,
1974 PREG_SPLIT_DELIM_CAPTURE);
1975 $text = '';
1976 $nsec = 0;
1977 for( $i = 0; $i < count($m); $i += 2 ) {
1978 $text .= $m[$i];
1979 if (!isset($m[$i + 1]) || $m[$i + 1] == "") continue;
1980 $hl = $m[$i + 1];
1981 if( strstr($hl, "<!--MWTEMPLATESECTION") ) {
1982 $text .= $hl;
1983 continue;
1984 }
1985 preg_match('/^(={1,6})(.*?)(={1,6})\s*?$/m', $hl, $m2);
1986 $text .= $m2[1] . $m2[2] . "<!--MWTEMPLATESECTION="
1987 . $encodedname . "&" . base64_encode("$nsec") . "-->" . $m2[3];
1988
1989 $nsec++;
1990 }
1991 }
1992 }
1993
1994 # Empties the template path
1995 $this->mTemplatePath = array();
1996 if ( !$found ) {
1997 return $matches[0];
1998 } else {
1999 return $text;
2000 }
2001 }
2002
2003 /**
2004 * Triple brace replacement -- used for template arguments
2005 * @access private
2006 */
2007 function argSubstitution( $matches ) {
2008 $arg = trim( $matches[1] );
2009 $text = $matches[0];
2010 $inputArgs = end( $this->mArgStack );
2011
2012 if ( array_key_exists( $arg, $inputArgs ) ) {
2013 $text = $inputArgs[$arg];
2014 }
2015
2016 return $text;
2017 }
2018
2019 /**
2020 * Returns true if the function is allowed to include this entity
2021 * @access private
2022 */
2023 function incrementIncludeCount( $dbk ) {
2024 if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) {
2025 $this->mIncludeCount[$dbk] = 0;
2026 }
2027 if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) {
2028 return true;
2029 } else {
2030 return false;
2031 }
2032 }
2033
2034
2035 /**
2036 * Cleans up HTML, removes dangerous tags and attributes, and
2037 * removes HTML comments
2038 * @access private
2039 */
2040 function removeHTMLtags( $text ) {
2041 global $wgUseTidy, $wgUserHtml;
2042 $fname = 'Parser::removeHTMLtags';
2043 wfProfileIn( $fname );
2044
2045 if( $wgUserHtml ) {
2046 $htmlpairs = array( # Tags that must be closed
2047 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1',
2048 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's',
2049 'strike', 'strong', 'tt', 'var', 'div', 'center',
2050 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre',
2051 'ruby', 'rt' , 'rb' , 'rp', 'p'
2052 );
2053 $htmlsingle = array(
2054 'br', 'hr', 'li', 'dt', 'dd'
2055 );
2056 $htmlnest = array( # Tags that can be nested--??
2057 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
2058 'dl', 'font', 'big', 'small', 'sub', 'sup'
2059 );
2060 $tabletags = array( # Can only appear inside table
2061 'td', 'th', 'tr'
2062 );
2063 } else {
2064 $htmlpairs = array();
2065 $htmlsingle = array();
2066 $htmlnest = array();
2067 $tabletags = array();
2068 }
2069
2070 $htmlsingle = array_merge( $tabletags, $htmlsingle );
2071 $htmlelements = array_merge( $htmlsingle, $htmlpairs );
2072
2073 $htmlattrs = $this->getHTMLattrs () ;
2074
2075 # Remove HTML comments
2076 $text = $this->removeHTMLcomments( $text );
2077
2078 $bits = explode( '<', $text );
2079 $text = array_shift( $bits );
2080 if(!$wgUseTidy) {
2081 $tagstack = array(); $tablestack = array();
2082 foreach ( $bits as $x ) {
2083 $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
2084 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2085 $x, $regs );
2086 list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2087 error_reporting( $prev );
2088
2089 $badtag = 0 ;
2090 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2091 # Check our stack
2092 if ( $slash ) {
2093 # Closing a tag...
2094 if ( ! in_array( $t, $htmlsingle ) &&
2095 ( $ot = @array_pop( $tagstack ) ) != $t ) {
2096 @array_push( $tagstack, $ot );
2097 $badtag = 1;
2098 } else {
2099 if ( $t == 'table' ) {
2100 $tagstack = array_pop( $tablestack );
2101 }
2102 $newparams = '';
2103 }
2104 } else {
2105 # Keep track for later
2106 if ( in_array( $t, $tabletags ) &&
2107 ! in_array( 'table', $tagstack ) ) {
2108 $badtag = 1;
2109 } else if ( in_array( $t, $tagstack ) &&
2110 ! in_array ( $t , $htmlnest ) ) {
2111 $badtag = 1 ;
2112 } else if ( ! in_array( $t, $htmlsingle ) ) {
2113 if ( $t == 'table' ) {
2114 array_push( $tablestack, $tagstack );
2115 $tagstack = array();
2116 }
2117 array_push( $tagstack, $t );
2118 }
2119 # Strip non-approved attributes from the tag
2120 $newparams = $this->fixTagAttributes($params);
2121
2122 }
2123 if ( ! $badtag ) {
2124 $rest = str_replace( '>', '&gt;', $rest );
2125 $text .= "<$slash$t $newparams$brace$rest";
2126 continue;
2127 }
2128 }
2129 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2130 }
2131 # Close off any remaining tags
2132 while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) {
2133 $text .= "</$t>\n";
2134 if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); }
2135 }
2136 } else {
2137 # this might be possible using tidy itself
2138 foreach ( $bits as $x ) {
2139 preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/',
2140 $x, $regs );
2141 @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
2142 if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
2143 $newparams = $this->fixTagAttributes($params);
2144 $rest = str_replace( '>', '&gt;', $rest );
2145 $text .= "<$slash$t $newparams$brace$rest";
2146 } else {
2147 $text .= '&lt;' . str_replace( '>', '&gt;', $x);
2148 }
2149 }
2150 }
2151 wfProfileOut( $fname );
2152 return $text;
2153 }
2154
2155 /**
2156 * Remove '<!--', '-->', and everything between.
2157 * To avoid leaving blank lines, when a comment is both preceded
2158 * and followed by a newline (ignoring spaces), trim leading and
2159 * trailing spaces and one of the newlines.
2160 *
2161 * @access private
2162 */
2163 function removeHTMLcomments( $text ) {
2164 $fname='Parser::removeHTMLcomments';
2165 wfProfileIn( $fname );
2166 while (($start = strpos($text, '<!--')) !== false) {
2167 $end = strpos($text, '-->', $start + 4);
2168 if ($end === false) {
2169 # Unterminated comment; bail out
2170 break;
2171 }
2172
2173 $end += 3;
2174
2175 # Trim space and newline if the comment is both
2176 # preceded and followed by a newline
2177 $spaceStart = max($start - 1, 0);
2178 $spaceLen = $end - $spaceStart;
2179 while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
2180 $spaceStart--;
2181 $spaceLen++;
2182 }
2183 while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
2184 $spaceLen++;
2185 if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
2186 # Remove the comment, leading and trailing
2187 # spaces, and leave only one newline.
2188 $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
2189 }
2190 else {
2191 # Remove just the comment.
2192 $text = substr_replace($text, '', $start, $end - $start);
2193 }
2194 }
2195 wfProfileOut( $fname );
2196 return $text;
2197 }
2198
2199 /**
2200 * This function accomplishes several tasks:
2201 * 1) Auto-number headings if that option is enabled
2202 * 2) Add an [edit] link to sections for logged in users who have enabled the option
2203 * 3) Add a Table of contents on the top for users who have enabled the option
2204 * 4) Auto-anchor headings
2205 *
2206 * It loops through all headlines, collects the necessary data, then splits up the
2207 * string and re-inserts the newly formatted headlines.
2208 * @access private
2209 */
2210 /* private */ function formatHeadings( $text, $isMain=true ) {
2211 global $wgInputEncoding, $wgMaxTocLevel, $wgContLang, $wgLinkHolders;
2212
2213 $doNumberHeadings = $this->mOptions->getNumberHeadings();
2214 $doShowToc = $this->mOptions->getShowToc();
2215 $forceTocHere = false;
2216 if( !$this->mTitle->userCanEdit() ) {
2217 $showEditLink = 0;
2218 $rightClickHack = 0;
2219 } else {
2220 $showEditLink = $this->mOptions->getEditSection();
2221 $rightClickHack = $this->mOptions->getEditSectionOnRightClick();
2222 }
2223
2224 # Inhibit editsection links if requested in the page
2225 $esw =& MagicWord::get( MAG_NOEDITSECTION );
2226 if( $esw->matchAndRemove( $text ) ) {
2227 $showEditLink = 0;
2228 }
2229 # if the string __NOTOC__ (not case-sensitive) occurs in the HTML,
2230 # do not add TOC
2231 $mw =& MagicWord::get( MAG_NOTOC );
2232 if( $mw->matchAndRemove( $text ) ) {
2233 $doShowToc = 0;
2234 }
2235
2236 # never add the TOC to the Main Page. This is an entry page that should not
2237 # be more than 1-2 screens large anyway
2238 if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) {
2239 $doShowToc = 0;
2240 }
2241
2242 # Get all headlines for numbering them and adding funky stuff like [edit]
2243 # links - this is for later, but we need the number of headlines right now
2244 $numMatches = preg_match_all( '/<H([1-6])(.*?' . '>)(.*?)<\/H[1-6]>/i', $text, $matches );
2245
2246 # if there are fewer than 4 headlines in the article, do not show TOC
2247 if( $numMatches < 4 ) {
2248 $doShowToc = 0;
2249 }
2250
2251 # if the string __TOC__ (not case-sensitive) occurs in the HTML,
2252 # override above conditions and always show TOC at that place
2253 $mw =& MagicWord::get( MAG_TOC );
2254 if ($mw->match( $text ) ) {
2255 $doShowToc = 1;
2256 $forceTocHere = true;
2257 } else {
2258 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
2259 # override above conditions and always show TOC above first header
2260 $mw =& MagicWord::get( MAG_FORCETOC );
2261 if ($mw->matchAndRemove( $text ) ) {
2262 $doShowToc = 1;
2263 }
2264 }
2265
2266
2267
2268 # We need this to perform operations on the HTML
2269 $sk =& $this->mOptions->getSkin();
2270
2271 # headline counter
2272 $headlineCount = 0;
2273 $sectionCount = 0; # headlineCount excluding template sections
2274
2275 # Ugh .. the TOC should have neat indentation levels which can be
2276 # passed to the skin functions. These are determined here
2277 $toclevel = 0;
2278 $toc = '';
2279 $full = '';
2280 $head = array();
2281 $sublevelCount = array();
2282 $level = 0;
2283 $prevlevel = 0;
2284 foreach( $matches[3] as $headline ) {
2285 $istemplate = 0;
2286 $templatetitle = "";
2287 $templatesection = 0;
2288
2289 if (preg_match("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", $headline, $mat)) {
2290 $istemplate = 1;
2291 $templatetitle = base64_decode($mat[1]);
2292 $templatesection = 1 + (int)base64_decode($mat[2]);
2293 $headline = preg_replace("/<!--MWTEMPLATESECTION=([^&]+)&([^_]+)-->/", "", $headline);
2294 }
2295
2296 $numbering = '';
2297 if( $level ) {
2298 $prevlevel = $level;
2299 }
2300 $level = $matches[1][$headlineCount];
2301 if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) {
2302 # reset when we enter a new level
2303 $sublevelCount[$level] = 0;
2304 $toc .= $sk->tocIndent( $level - $prevlevel );
2305 $toclevel += $level - $prevlevel;
2306 }
2307 if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) {
2308 # reset when we step back a level
2309 $sublevelCount[$level+1]=0;
2310 $toc .= $sk->tocUnindent( $prevlevel - $level );
2311 $toclevel -= $prevlevel - $level;
2312 }
2313 # count number of headlines for each level
2314 @$sublevelCount[$level]++;
2315 if( $doNumberHeadings || $doShowToc ) {
2316 $dot = 0;
2317 for( $i = 1; $i <= $level; $i++ ) {
2318 if( !empty( $sublevelCount[$i] ) ) {
2319 if( $dot ) {
2320 $numbering .= '.';
2321 }
2322 $numbering .= $wgContLang->formatNum( $sublevelCount[$i] );
2323 $dot = 1;
2324 }
2325 }
2326 }
2327
2328 # The canonized header is a version of the header text safe to use for links
2329 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
2330 $canonized_headline = $this->unstrip( $headline, $this->mStripState );
2331 $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState );
2332
2333 # Remove link placeholders by the link text.
2334 # <!--LINK number-->
2335 # turns into
2336 # link text with suffix
2337 $canonized_headline = preg_replace( '/<!--LINK ([0-9]*)-->/e',
2338 "\$wgLinkHolders['texts'][\$1]",
2339 $canonized_headline );
2340
2341 # strip out HTML
2342 $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline );
2343 $tocline = trim( $canonized_headline );
2344 $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) );
2345 $replacearray = array(
2346 '%3A' => ':',
2347 '%' => '.'
2348 );
2349 $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline);
2350 $refer[$headlineCount] = $canonized_headline;
2351
2352 # count how many in assoc. array so we can track dupes in anchors
2353 @$refers[$canonized_headline]++;
2354 $refcount[$headlineCount]=$refers[$canonized_headline];
2355
2356 # Prepend the number to the heading text
2357
2358 if( $doNumberHeadings || $doShowToc ) {
2359 $tocline = $numbering . ' ' . $tocline;
2360
2361 # Don't number the heading if it is the only one (looks silly)
2362 if( $doNumberHeadings && count( $matches[3] ) > 1) {
2363 # the two are different if the line contains a link
2364 $headline=$numbering . ' ' . $headline;
2365 }
2366 }
2367
2368 # Create the anchor for linking from the TOC to the section
2369 $anchor = $canonized_headline;
2370 if($refcount[$headlineCount] > 1 ) {
2371 $anchor .= '_' . $refcount[$headlineCount];
2372 }
2373 if( $doShowToc && ( !isset($wgMaxTocLevel) || $toclevel<$wgMaxTocLevel ) ) {
2374 $toc .= $sk->tocLine($anchor,$tocline,$toclevel);
2375 }
2376 if( $showEditLink && ( !$istemplate || $templatetitle !== "" ) ) {
2377 if ( empty( $head[$headlineCount] ) ) {
2378 $head[$headlineCount] = '';
2379 }
2380 if( $istemplate )
2381 $head[$headlineCount] .= $sk->editSectionLinkForOther($templatetitle, $templatesection);
2382 else
2383 $head[$headlineCount] .= $sk->editSectionLink($sectionCount+1);
2384 }
2385
2386 # Add the edit section span
2387 if( $rightClickHack ) {
2388 if( $istemplate )
2389 $headline = $sk->editSectionScriptForOther($templatetitle, $templatesection, $headline);
2390 else
2391 $headline = $sk->editSectionScript($sectionCount+1,$headline);
2392 }
2393
2394 # give headline the correct <h#> tag
2395 @$head[$headlineCount] .= "<a name=\"$anchor\"></a><h".$level.$matches[2][$headlineCount] .$headline.'</h'.$level.'>';
2396
2397 $headlineCount++;
2398 if( !$istemplate )
2399 $sectionCount++;
2400 }
2401
2402 if( $doShowToc ) {
2403 $toclines = $headlineCount;
2404 $toc .= $sk->tocUnindent( $toclevel );
2405 $toc = $sk->tocTable( $toc );
2406 }
2407
2408 # split up and insert constructed headlines
2409
2410 $blocks = preg_split( '/<H[1-6].*?' . '>.*?<\/H[1-6]>/i', $text );
2411 $i = 0;
2412
2413 foreach( $blocks as $block ) {
2414 if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) {
2415 # This is the [edit] link that appears for the top block of text when
2416 # section editing is enabled
2417
2418 # Disabled because it broke block formatting
2419 # For example, a bullet point in the top line
2420 # $full .= $sk->editSectionLink(0);
2421 }
2422 $full .= $block;
2423 if( $doShowToc && !$i && $isMain && !$forceTocHere) {
2424 # Top anchor now in skin
2425 $full = $full.$toc;
2426 }
2427
2428 if( !empty( $head[$i] ) ) {
2429 $full .= $head[$i];
2430 }
2431 $i++;
2432 }
2433 if($forceTocHere) {
2434 $mw =& MagicWord::get( MAG_TOC );
2435 return $mw->replace( $toc, $full );
2436 } else {
2437 return $full;
2438 }
2439 }
2440
2441 /**
2442 * Return an HTML link for the "ISBN 123456" text
2443 * @access private
2444 */
2445 function magicISBN( $text ) {
2446 global $wgLang;
2447 $fname = 'Parser::magicISBN';
2448 wfProfileIn( $fname );
2449
2450 $a = split( 'ISBN ', ' '.$text );
2451 if ( count ( $a ) < 2 ) {
2452 wfProfileOut( $fname );
2453 return $text;
2454 }
2455 $text = substr( array_shift( $a ), 1);
2456 $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ';
2457
2458 foreach ( $a as $x ) {
2459 $isbn = $blank = '' ;
2460 while ( ' ' == $x{0} ) {
2461 $blank .= ' ';
2462 $x = substr( $x, 1 );
2463 }
2464 if ( $x == '' ) { # blank isbn
2465 $text .= "ISBN $blank";
2466 continue;
2467 }
2468 while ( strstr( $valid, $x{0} ) != false ) {
2469 $isbn .= $x{0};
2470 $x = substr( $x, 1 );
2471 }
2472 $num = str_replace( '-', '', $isbn );
2473 $num = str_replace( ' ', '', $num );
2474
2475 if ( '' == $num ) {
2476 $text .= "ISBN $blank$x";
2477 } else {
2478 $titleObj = Title::makeTitle( NS_SPECIAL, 'Booksources' );
2479 $text .= '<a href="' .
2480 $titleObj->escapeLocalUrl( 'isbn='.$num ) .
2481 "\" class=\"internal\">ISBN $isbn</a>";
2482 $text .= $x;
2483 }
2484 }
2485 wfProfileOut( $fname );
2486 return $text;
2487 }
2488
2489 /**
2490 * Return an HTML link for the "GEO ..." text
2491 * @access private
2492 */
2493 function magicGEO( $text ) {
2494 global $wgLang, $wgUseGeoMode;
2495 $fname = 'Parser::magicGEO';
2496 wfProfileIn( $fname );
2497
2498 # These next five lines are only for the ~35000 U.S. Census Rambot pages...
2499 $directions = array ( 'N' => 'North' , 'S' => 'South' , 'E' => 'East' , 'W' => 'West' ) ;
2500 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2501 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['N']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2502 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['W']}/" , "(GEO +\$1.\$2.\$3:-\$4.\$5.\$6)" , $text ) ;
2503 $text = preg_replace ( "/(\d+)&deg;(\d+)'(\d+)\" {$directions['S']}, (\d+)&deg;(\d+)'(\d+)\" {$directions['E']}/" , "(GEO +\$1.\$2.\$3:+\$4.\$5.\$6)" , $text ) ;
2504
2505 $a = split( 'GEO ', ' '.$text );
2506 if ( count ( $a ) < 2 ) {
2507 wfProfileOut( $fname );
2508 return $text;
2509 }
2510 $text = substr( array_shift( $a ), 1);
2511 $valid = '0123456789.+-:';
2512
2513 foreach ( $a as $x ) {
2514 $geo = $blank = '' ;
2515 while ( ' ' == $x{0} ) {
2516 $blank .= ' ';
2517 $x = substr( $x, 1 );
2518 }
2519 while ( strstr( $valid, $x{0} ) != false ) {
2520 $geo .= $x{0};
2521 $x = substr( $x, 1 );
2522 }
2523 $num = str_replace( '+', '', $geo );
2524 $num = str_replace( ' ', '', $num );
2525
2526 if ( '' == $num || count ( explode ( ':' , $num , 3 ) ) < 2 ) {
2527 $text .= "GEO $blank$x";
2528 } else {
2529 $titleObj = Title::makeTitle( NS_SPECIAL, 'Geo' );
2530 $text .= '<a href="' .
2531 $titleObj->escapeLocalUrl( 'coordinates='.$num ) .
2532 "\" class=\"internal\">GEO $geo</a>";
2533 $text .= $x;
2534 }
2535 }
2536 wfProfileOut( $fname );
2537 return $text;
2538 }
2539
2540 /**
2541 * Return an HTML link for the "RFC 1234" text
2542 * @access private
2543 * @param string $text text to be processed
2544 */
2545 function magicRFC( $text ) {
2546 global $wgLang;
2547
2548 $valid = '0123456789';
2549 $internal = false;
2550
2551 $a = split( 'RFC ', ' '.$text );
2552 if ( count ( $a ) < 2 ) return $text;
2553 $text = substr( array_shift( $a ), 1);
2554
2555 /* Check if RFC keyword is preceed by [[.
2556 * This test is made here cause of the array_shift above
2557 * that prevent the test to be done in the foreach.
2558 */
2559 if(substr($text, -2) == '[[') { $internal = true; }
2560
2561 foreach ( $a as $x ) {
2562 /* token might be empty if we have RFC RFC 1234 */
2563 if($x=='') {
2564 $text.='RFC ';
2565 continue;
2566 }
2567
2568 $rfc = $blank = '' ;
2569
2570 /** remove and save whitespaces in $blank */
2571 while ( $x{0} == ' ' ) {
2572 $blank .= ' ';
2573 $x = substr( $x, 1 );
2574 }
2575
2576 /** remove and save the rfc number in $rfc */
2577 while ( strstr( $valid, $x{0} ) != false ) {
2578 $rfc .= $x{0};
2579 $x = substr( $x, 1 );
2580 }
2581
2582 if ( $rfc == '') {
2583 /* call back stripped spaces*/
2584 $text .= "RFC $blank$x";
2585 } elseif( $internal) {
2586 /* normal link */
2587 $text .= "RFC $rfc$x";
2588 } else {
2589 /* build the external link*/
2590 $url = wfmsg( 'rfcurl' );
2591 $url = str_replace( '$1', $rfc, $url);
2592 $sk =& $this->mOptions->getSkin();
2593 $la = $sk->getExternalLinkAttributes( $url, 'RFC '.$rfc );
2594 $text .= "<a href='{$url}'{$la}>RFC {$rfc}</a>{$x}";
2595 }
2596
2597 /* Check if the next RFC keyword is preceed by [[ */
2598 $internal = (substr($x,-2) == '[[');
2599 }
2600 return $text;
2601 }
2602
2603 /**
2604 * Transform wiki markup when saving a page by doing \r\n -> \n
2605 * conversion, substitting signatures, {{subst:}} templates, etc.
2606 *
2607 * @param string $text the text to transform
2608 * @param Title &$title the Title object for the current article
2609 * @param User &$user the User object describing the current user
2610 * @param ParserOptions $options parsing options
2611 * @param bool $clearState whether to clear the parser state first
2612 * @return string the altered wiki markup
2613 * @access public
2614 */
2615 function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) {
2616 $this->mOptions = $options;
2617 $this->mTitle =& $title;
2618 $this->mOutputType = OT_WIKI;
2619
2620 if ( $clearState ) {
2621 $this->clearState();
2622 }
2623
2624 $stripState = false;
2625 $pairs = array(
2626 "\r\n" => "\n",
2627 );
2628 $text = str_replace(array_keys($pairs), array_values($pairs), $text);
2629 // now with regexes
2630 /*
2631 $pairs = array(
2632 "/<br.+(clear|break)=[\"']?(all|both)[\"']?\\/?>/i" => '<br style="clear:both;"/>',
2633 "/<br *?>/i" => "<br />",
2634 );
2635 $text = preg_replace(array_keys($pairs), array_values($pairs), $text);
2636 */
2637 $text = $this->strip( $text, $stripState, false );
2638 $text = $this->pstPass2( $text, $user );
2639 $text = $this->unstrip( $text, $stripState );
2640 $text = $this->unstripNoWiki( $text, $stripState );
2641 return $text;
2642 }
2643
2644 /**
2645 * Pre-save transform helper function
2646 * @access private
2647 */
2648 function pstPass2( $text, &$user ) {
2649 global $wgLang, $wgContLang, $wgLocaltimezone, $wgCurParser;
2650
2651 # Variable replacement
2652 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
2653 $text = $this->replaceVariables( $text );
2654
2655 # Signatures
2656 #
2657 $n = $user->getName();
2658 $k = $user->getOption( 'nickname' );
2659 if ( '' == $k ) { $k = $n; }
2660 if(isset($wgLocaltimezone)) {
2661 $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone);
2662 }
2663 /* Note: this is an ugly timezone hack for the European wikis */
2664 $d = $wgContLang->timeanddate( date( 'YmdHis' ), false ) .
2665 ' (' . date( 'T' ) . ')';
2666 if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs);
2667
2668 $text = preg_replace( '/~~~~~/', $d, $text );
2669 $text = preg_replace( '/~~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]] $d", $text );
2670 $text = preg_replace( '/~~~/', '[[' . $wgContLang->getNsText( NS_USER ) . ":$n|$k]]", $text );
2671
2672 # Context links: [[|name]] and [[name (context)|]]
2673 #
2674 $tc = "[&;%\\-,.\\(\\)' _0-9A-Za-z\\/:\\x80-\\xff]";
2675 $np = "[&;%\\-,.' _0-9A-Za-z\\/:\\x80-\\xff]"; # No parens
2676 $namespacechar = '[ _0-9A-Za-z\x80-\xff]'; # Namespaces can use non-ascii!
2677 $conpat = "/^({$np}+) \\(({$tc}+)\\)$/";
2678
2679 $p1 = "/\[\[({$np}+) \\(({$np}+)\\)\\|]]/"; # [[page (context)|]]
2680 $p2 = "/\[\[\\|({$tc}+)]]/"; # [[|page]]
2681 $p3 = "/\[\[(:*$namespacechar+):({$np}+)\\|]]/"; # [[namespace:page|]] and [[:namespace:page|]]
2682 $p4 = "/\[\[(:*$namespacechar+):({$np}+) \\(({$np}+)\\)\\|]]/"; # [[ns:page (cont)|]] and [[:ns:page (cont)|]]
2683 $context = '';
2684 $t = $this->mTitle->getText();
2685 if ( preg_match( $conpat, $t, $m ) ) {
2686 $context = $m[2];
2687 }
2688 $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text );
2689 $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text );
2690 $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text );
2691
2692 if ( '' == $context ) {
2693 $text = preg_replace( $p2, '[[\\1]]', $text );
2694 } else {
2695 $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text );
2696 }
2697
2698 # Trim trailing whitespace
2699 # MAG_END (__END__) tag allows for trailing
2700 # whitespace to be deliberately included
2701 $text = rtrim( $text );
2702 $mw =& MagicWord::get( MAG_END );
2703 $mw->matchAndRemove( $text );
2704
2705 return $text;
2706 }
2707
2708 /**
2709 * Set up some variables which are usually set up in parse()
2710 * so that an external function can call some class members with confidence
2711 * @access public
2712 */
2713 function startExternalParse( &$title, $options, $outputType, $clearState = true ) {
2714 $this->mTitle =& $title;
2715 $this->mOptions = $options;
2716 $this->mOutputType = $outputType;
2717 if ( $clearState ) {
2718 $this->clearState();
2719 }
2720 }
2721
2722 /**
2723 * Transform a MediaWiki message by replacing magic variables.
2724 *
2725 * @param string $text the text to transform
2726 * @param ParserOptions $options options
2727 * @return string the text with variables substituted
2728 * @access public
2729 */
2730 function transformMsg( $text, $options ) {
2731 global $wgTitle;
2732 static $executing = false;
2733
2734 # Guard against infinite recursion
2735 if ( $executing ) {
2736 return $text;
2737 }
2738 $executing = true;
2739
2740 $this->mTitle = $wgTitle;
2741 $this->mOptions = $options;
2742 $this->mOutputType = OT_MSG;
2743 $this->clearState();
2744 $text = $this->replaceVariables( $text );
2745
2746 $executing = false;
2747 return $text;
2748 }
2749
2750 /**
2751 * Create an HTML-style tag, e.g. <yourtag>special text</yourtag>
2752 * Callback will be called with the text within
2753 * Transform and return the text within
2754 * @access public
2755 */
2756 function setHook( $tag, $callback ) {
2757 $oldVal = @$this->mTagHooks[$tag];
2758 $this->mTagHooks[$tag] = $callback;
2759 return $oldVal;
2760 }
2761 }
2762
2763 /**
2764 * @todo document
2765 * @package MediaWiki
2766 */
2767 class ParserOutput
2768 {
2769 var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic;
2770 var $mCacheTime; # Used in ParserCache
2771
2772 function ParserOutput( $text = '', $languageLinks = array(), $categoryLinks = array(),
2773 $containsOldMagic = false )
2774 {
2775 $this->mText = $text;
2776 $this->mLanguageLinks = $languageLinks;
2777 $this->mCategoryLinks = $categoryLinks;
2778 $this->mContainsOldMagic = $containsOldMagic;
2779 $this->mCacheTime = '';
2780 }
2781
2782 function getText() { return $this->mText; }
2783 function getLanguageLinks() { return $this->mLanguageLinks; }
2784 function getCategoryLinks() { return $this->mCategoryLinks; }
2785 function getCacheTime() { return $this->mCacheTime; }
2786 function containsOldMagic() { return $this->mContainsOldMagic; }
2787 function setText( $text ) { return wfSetVar( $this->mText, $text ); }
2788 function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); }
2789 function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); }
2790 function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
2791 function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); }
2792
2793 function merge( $other ) {
2794 $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks );
2795 $this->mCategoryLinks = array_merge( $this->mCategoryLinks, $this->mLanguageLinks );
2796 $this->mContainsOldMagic = $this->mContainsOldMagic || $other->mContainsOldMagic;
2797 }
2798
2799 }
2800
2801 /**
2802 * Set options of the Parser
2803 * @todo document
2804 * @package MediaWiki
2805 */
2806 class ParserOptions
2807 {
2808 # All variables are private
2809 var $mUseTeX; # Use texvc to expand <math> tags
2810 var $mUseDynamicDates; # Use $wgDateFormatter to format dates
2811 var $mInterwikiMagic; # Interlanguage links are removed and returned in an array
2812 var $mAllowExternalImages; # Allow external images inline
2813 var $mSkin; # Reference to the preferred skin
2814 var $mDateFormat; # Date format index
2815 var $mEditSection; # Create "edit section" links
2816 var $mEditSectionOnRightClick; # Generate JavaScript to edit section on right click
2817 var $mNumberHeadings; # Automatically number headings
2818 var $mShowToc; # Show table of contents
2819
2820 function getUseTeX() { return $this->mUseTeX; }
2821 function getUseDynamicDates() { return $this->mUseDynamicDates; }
2822 function getInterwikiMagic() { return $this->mInterwikiMagic; }
2823 function getAllowExternalImages() { return $this->mAllowExternalImages; }
2824 function getSkin() { return $this->mSkin; }
2825 function getDateFormat() { return $this->mDateFormat; }
2826 function getEditSection() { return $this->mEditSection; }
2827 function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; }
2828 function getNumberHeadings() { return $this->mNumberHeadings; }
2829 function getShowToc() { return $this->mShowToc; }
2830
2831 function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); }
2832 function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); }
2833 function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); }
2834 function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); }
2835 function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); }
2836 function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); }
2837 function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); }
2838 function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); }
2839 function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); }
2840
2841 function setSkin( &$x ) { $this->mSkin =& $x; }
2842
2843 # Get parser options
2844 /* static */ function newFromUser( &$user ) {
2845 $popts = new ParserOptions;
2846 $popts->initialiseFromUser( $user );
2847 return $popts;
2848 }
2849
2850 # Get user options
2851 function initialiseFromUser( &$userInput ) {
2852 global $wgUseTeX, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages;
2853
2854 $fname = 'ParserOptions::initialiseFromUser';
2855 wfProfileIn( $fname );
2856 if ( !$userInput ) {
2857 $user = new User;
2858 $user->setLoaded( true );
2859 } else {
2860 $user =& $userInput;
2861 }
2862
2863 $this->mUseTeX = $wgUseTeX;
2864 $this->mUseDynamicDates = $wgUseDynamicDates;
2865 $this->mInterwikiMagic = $wgInterwikiMagic;
2866 $this->mAllowExternalImages = $wgAllowExternalImages;
2867 wfProfileIn( $fname.'-skin' );
2868 $this->mSkin =& $user->getSkin();
2869 wfProfileOut( $fname.'-skin' );
2870 $this->mDateFormat = $user->getOption( 'date' );
2871 $this->mEditSection = $user->getOption( 'editsection' );
2872 $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' );
2873 $this->mNumberHeadings = $user->getOption( 'numberheadings' );
2874 $this->mShowToc = $user->getOption( 'showtoc' );
2875 wfProfileOut( $fname );
2876 }
2877
2878
2879 }
2880
2881 # Regex callbacks, used in Parser::replaceVariables
2882 function wfBraceSubstitution( $matches ) {
2883 global $wgCurParser;
2884 return $wgCurParser->braceSubstitution( $matches );
2885 }
2886
2887 function wfArgSubstitution( $matches ) {
2888 global $wgCurParser;
2889 return $wgCurParser->argSubstitution( $matches );
2890 }
2891
2892 function wfVariableSubstitution( $matches ) {
2893 global $wgCurParser;
2894 return $wgCurParser->variableSubstitution( $matches );
2895 }
2896
2897 /**
2898 * Return the total number of articles
2899 */
2900 function wfNumberOfArticles() {
2901 global $wgNumberOfArticles;
2902
2903 wfLoadSiteStats();
2904 return $wgNumberOfArticles;
2905 }
2906
2907 /**
2908 * Get various statistics from the database
2909 * @private
2910 */
2911 function wfLoadSiteStats() {
2912 global $wgNumberOfArticles, $wgTotalViews, $wgTotalEdits;
2913 $fname = 'wfLoadSiteStats';
2914
2915 if ( -1 != $wgNumberOfArticles ) return;
2916 $dbr =& wfGetDB( DB_SLAVE );
2917 $s = $dbr->getArray( 'site_stats',
2918 array( 'ss_total_views', 'ss_total_edits', 'ss_good_articles' ),
2919 array( 'ss_row_id' => 1 ), $fname
2920 );
2921
2922 if ( $s === false ) {
2923 return;
2924 } else {
2925 $wgTotalViews = $s->ss_total_views;
2926 $wgTotalEdits = $s->ss_total_edits;
2927 $wgNumberOfArticles = $s->ss_good_articles;
2928 }
2929 }
2930
2931 function wfEscapeHTMLTagsOnly( $in ) {
2932 return str_replace(
2933 array( '"', '>', '<' ),
2934 array( '&quot;', '&gt;', '&lt;' ),
2935 $in );
2936 }
2937
2938 ?>