<?php
-include_once('Tokenizer.php');
+require_once('Tokenizer.php');
if( $GLOBALS['wgUseWikiHiero'] ){
- include_once('wikihiero.php');
+ require_once('extensions/wikihiero/wikihiero.php');
}
if( $GLOBALS['wgUseTimeline'] ){
- include_once('extensions/timeline/Timeline.php');
+ require_once('extensions/timeline/Timeline.php');
}
# PHP Parser
$text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
foreach( $math_content as $marker => $content ){
- if( $render && $this->mOptions->getUseTeX() ){
- $math_content[$marker] = renderMath( $content );
+ if( $render ) {
+ if( $this->mOptions->getUseTeX() ) {
+ $math_content[$marker] = renderMath( $content );
+ } else {
+ $math_content[$marker] = "<math>$content<math>";
+ }
} else {
$math_content[$marker] = "<math>$content</math>";
}
/* interface with html tidy, used if $wgUseTidy = true */
function tidy ( $text ) {
global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
+ global $wgInputEncoding, $wgOutputEncoding;
$cleansource = '';
+ switch(strtoupper($wgOutputEncoding)) {
+ case 'ISO-8859-1':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw';
+ break;
+ case 'UTF-8':
+ $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw';
+ break;
+ default:
+ $wgTidyOpts .= ' -raw';
+ }
+
+ $text = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'.
+'<head><title>test</title></head><body>'.$text.'</body></html>';
$descriptorspec = array(
0 => array("pipe", "r"),
1 => array("pipe", "w"),
fclose($pipes[1]);
$return_value = proc_close($process);
}
- return preg_replace("/(^.*<body[^>]*>|<\\/body[^>]*>.*$)/s", '', $cleansource);
-
+ if( $cleansource == '' && $text != '') {
+ return '<h2>'.wfMsg('seriousxhtmlerrors').'</h2><pre>'.htmlspecialchars($text).'</pre>';
+ } else {
+ return $cleansource;
+ }
}
function doTableStuff ( $t )
# Cleans up HTML, removes dangerous tags and attributes
/* private */ function removeHTMLtags( $text )
{
+ global $wgUseTidy, $wgUserHtml;
$fname = "Parser::removeHTMLtags";
wfProfileIn( $fname );
- $htmlpairs = array( # Tags that must be closed
- "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
- "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
- "strike", "strong", "tt", "var", "div", "center",
- "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
- "ruby", "rt" , "rb" , "rp", "p"
- );
- $htmlsingle = array(
- "br", "hr", "li", "dt", "dd"
- );
- $htmlnest = array( # Tags that can be nested--??
- "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
- "dl", "font", "big", "small", "sub", "sup"
- );
- $tabletags = array( # Can only appear inside table
- "td", "th", "tr"
- );
+
+ if( $wgUserHtml ) {
+ $htmlpairs = array( # Tags that must be closed
+ "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1",
+ "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s",
+ "strike", "strong", "tt", "var", "div", "center",
+ "blockquote", "ol", "ul", "dl", "table", "caption", "pre",
+ "ruby", "rt" , "rb" , "rp", "p"
+ );
+ $htmlsingle = array(
+ "br", "hr", "li", "dt", "dd"
+ );
+ $htmlnest = array( # Tags that can be nested--??
+ "table", "tr", "td", "th", "div", "blockquote", "ol", "ul",
+ "dl", "font", "big", "small", "sub", "sup"
+ );
+ $tabletags = array( # Can only appear inside table
+ "td", "th", "tr"
+ );
+ } else {
+ $htmlpairs = array();
+ $htmlsingle = array();
+ $htmlnest = array();
+ $tabletags = array();
+ }
$htmlsingle = array_merge( $tabletags, $htmlsingle );
$htmlelements = array_merge( $htmlsingle, $htmlpairs );
$bits = explode( "<", $text );
$text = array_shift( $bits );
- $tagstack = array(); $tablestack = array();
-
- foreach ( $bits as $x ) {
- $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
- preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
- $x, $regs );
- list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
- error_reporting( $prev );
-
- $badtag = 0 ;
- if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
- # Check our stack
- if ( $slash ) {
- # Closing a tag...
- if ( ! in_array( $t, $htmlsingle ) &&
- ( $ot = array_pop( $tagstack ) ) != $t ) {
- array_push( $tagstack, $ot );
- $badtag = 1;
+ if(!$wgUseTidy) {
+ $tagstack = array(); $tablestack = array();
+ foreach ( $bits as $x ) {
+ $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) );
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ error_reporting( $prev );
+
+ $badtag = 0 ;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ # Check our stack
+ if ( $slash ) {
+ # Closing a tag...
+ if ( ! in_array( $t, $htmlsingle ) &&
+ ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) {
+ if(!empty($ot)) array_push( $tagstack, $ot );
+ $badtag = 1;
+ } else {
+ if ( $t == "table" ) {
+ $tagstack = array_pop( $tablestack );
+ }
+ $newparams = "";
+ }
} else {
- if ( $t == "table" ) {
- $tagstack = array_pop( $tablestack );
+ # Keep track for later
+ if ( in_array( $t, $tabletags ) &&
+ ! in_array( "table", $tagstack ) ) {
+ $badtag = 1;
+ } else if ( in_array( $t, $tagstack ) &&
+ ! in_array ( $t , $htmlnest ) ) {
+ $badtag = 1 ;
+ } else if ( ! in_array( $t, $htmlsingle ) ) {
+ if ( $t == "table" ) {
+ array_push( $tablestack, $tagstack );
+ $tagstack = array();
+ }
+ array_push( $tagstack, $t );
}
- $newparams = "";
+ # Strip non-approved attributes from the tag
+ $newparams = $this->fixTagAttributes($params);
+
}
- } else {
- # Keep track for later
- if ( in_array( $t, $tabletags ) &&
- ! in_array( "table", $tagstack ) ) {
- $badtag = 1;
- } else if ( in_array( $t, $tagstack ) &&
- ! in_array ( $t , $htmlnest ) ) {
- $badtag = 1 ;
- } else if ( ! in_array( $t, $htmlsingle ) ) {
- if ( $t == "table" ) {
- array_push( $tablestack, $tagstack );
- $tagstack = array();
- }
- array_push( $tagstack, $t );
+ if ( ! $badtag ) {
+ $rest = str_replace( ">", ">", $rest );
+ $text .= "<$slash$t $newparams$brace$rest";
+ continue;
}
- # Strip non-approved attributes from the tag
- $newparams = $this->fixTagAttributes($params);
-
}
- if ( ! $badtag ) {
+ $text .= "<" . str_replace( ">", ">", $x);
+ }
+ # Close off any remaining tags
+ while ( $t = array_pop( $tagstack ) ) {
+ $text .= "</$t>\n";
+ if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
+ } else {
+ # this might be possible using tidy itself
+ foreach ( $bits as $x ) {
+ preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/",
+ $x, $regs );
+ @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs;
+ if ( in_array( $t = strtolower( $t ), $htmlelements ) ) {
+ $newparams = $this->fixTagAttributes($params);
$rest = str_replace( ">", ">", $rest );
$text .= "<$slash$t $newparams$brace$rest";
- continue;
+ } else {
+ $text .= "<" . str_replace( ">", ">", $x);
}
- }
- $text .= "<" . str_replace( ">", ">", $x);
- }
- # Close off any remaining tags
- while ( $t = array_pop( $tagstack ) ) {
- $text .= "</$t>\n";
- if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); }
+ }
}
wfProfileOut( $fname );
return $text;
}
+
/*
*
* This function accomplishes several tasks:
/* private */ function formatHeadings( $text )
{
+ global $wgInputEncoding;
+
$doNumberHeadings = $this->mOptions->getNumberHeadings();
$doShowToc = $this->mOptions->getShowToc();
if( !$this->mTitle->userCanEdit() ) {
# strip out HTML
$canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline );
$tocline = trim( $canonized_headline );
- $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline));
+ $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) );
$refer[$headlineCount] = $canonized_headline;
# count how many in assoc. array so we can track dupes in anchors