X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FParser.php;h=79aa4e724473f7b9b410f0cee0ec687c00b9eaab;hb=73ec400d1afc143f1b0fa50cb83c39281158b30c;hp=da0bc0ea663fe086e884c83ba459225fe90eca1e;hpb=f2e949d974335b676b615d418b8229351e328023;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index da0bc0ea66..79aa4e7244 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1,25 +1,28 @@ -style tags. This should not be anything we +# may want to use in wikisyntax +define( "STRIP_COMMENTS", "HTMLCommentStrip" ); + # prefix for escaping, used in two functions at least define( "UNIQ_PREFIX", "NaodW29"); class Parser { # Cleared with clearState(): - var $mOutput, $mAutonumber, $mLastSection, $mDTopen, $mStripState = array(); - var $mVariables, $mIncludeCount; + var $mOutput, $mAutonumber, $mDTopen, $mStripState = array(); + var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre; # Temporary: var $mOptions, $mTitle, $mOutputType; @@ -69,31 +76,56 @@ class Parser $this->mVariables = false; $this->mIncludeCount = array(); $this->mStripState = array(); + $this->mArgStack = array(); } - + # First pass--just handle sections, pass the rest off - # to doWikiPass2() which does all the real work. + # to internalParse() which does all the real work. # # Returns a ParserOutput # function parse( $text, &$title, $options, $linestart = true, $clearState = true ) { + global $wgUseTidy; $fname = "Parser::parse"; wfProfileIn( $fname ); if ( $clearState ) { $this->clearState(); } - + $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_HTML; - + $stripState = NULL; $text = $this->strip( $text, $this->mStripState ); - $text = $this->doWikiPass2( $text, $linestart ); + $text = $this->internalParse( $text, $linestart ); $text = $this->unstrip( $text, $this->mStripState ); - + # Clean up special characters, only run once, next-to-last before doBlockLevels + if(!$wgUseTidy) { + $fixtags = array( + "/
/i" => '
', + "/
/i" => '
', + "/
/i"=>'
', + "/<\\/center *>/i" => '
', + # Clean up spare ampersands; note that we probably ought to be + # more careful about named entities. + '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + } else { + $fixtags = array( + "/
/i"=>'
', + "/<\\/center *>/i" => '
' + ); + $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); + } + # only once and last + $text = $this->doBlockLevels( $text, $linestart ); + if($wgUseTidy) { + $text = $this->tidy($text); + } $this->mOutput->setText( $text ); wfProfileOut( $fname ); return $this->mOutput; @@ -104,13 +136,16 @@ class Parser return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); } - # Replaces all occurences of <$tag>content in the text + # Replaces all occurrences of <$tag>content in the text # with a random marker and returns the new text. the output parameter # $content will be an associative array filled with data on the form # $unique_marker => content. # If $content is already set, the additional entries will be appended + # If $tag is set to STRIP_COMMENTS, the function will extract + # + /* static */ function extractTags($tag, $text, &$content, $uniq_prefix = ""){ $rnd = $uniq_prefix . '-' . $tag . Parser::getRandomString(); if ( !$content ) { @@ -120,12 +155,20 @@ class Parser $stripped = ""; while ( "" != $text ) { - $p = preg_split( "/<\\s*$tag\\s*>/i", $text, 2 ); + if($tag==STRIP_COMMENTS) { + $p = preg_split( "//i", $p[1], 2 ); + } else { + $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); + } $marker = $rnd . sprintf("%08X", $n++); $content[$marker] = $q[0]; $stripped .= $marker; @@ -133,32 +176,29 @@ class Parser } } return $stripped; - } + } - # Strips ,
 and 
+	# Strips and renders , 
, , 
+	# If $render is set, performs necessary rendering operations on plugins
 	# Returns the text, and fills an array with data needed in unstrip()
 	# If the $state is already a valid strip state, it adds to the state
-	#
-	function strip( $text, &$state )
+
+	# When $stripcomments is set, HTML comments 
+	# will be stripped in addition to other tags. This is important
+	# for section editing, where these comments cause confusion when
+	# counting the sections in the wikisource
+	function strip( $text, &$state, $stripcomments = false )
 	{
 		$render = ($this->mOutputType == OT_HTML);
-		if ( $state ) {
-			$nowiki_content = $state['nowiki']; 
-			$hiero_content = $state['hiero'];
-			$math_content = $state['math'];
-			$pre_content = $state['pre'];
-			$item_content = $state['item'];
-		} else {
-			$nowiki_content = array(); 
-			$hiero_content = array();
-			$math_content = array();
-			$pre_content = array();
-			$item_content = array();
-		}
+		$nowiki_content = array();
+		$hiero_content = array();
+		$math_content = array();
+		$pre_content = array();
+		$comment_content = array();
 
 		# Replace any instances of the placeholders
 		$uniq_prefix = UNIQ_PREFIX;
-		$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
+		#$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text );
 
 		$text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix);
 		foreach( $nowiki_content as $marker => $content ){
@@ -169,25 +209,25 @@ class Parser
 			}
 		}
 
-		if( $GLOBALS['wgUseWikiHiero'] ){
-			$text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
-			foreach( $hiero_content as $marker => $content ){
-				if( $render ){
-					$hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
-				} else {
-					$hiero_content[$marker] = "$content";
-				}
+		$text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix);
+		foreach( $hiero_content as $marker => $content ){
+			if( $render && $GLOBALS['wgUseWikiHiero']){
+				$hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML);
+			} else {
+				$hiero_content[$marker] = "$content";
 			}
 		}
 
-		if( $this->mOptions->getUseTeX() ){
-			$text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
-			foreach( $math_content as $marker => $content ){
-				if( $render ){
+		$text = Parser::extractTags("math", $text, $math_content, $uniq_prefix);
+		foreach( $math_content as $marker => $content ){
+			if( $render ) {
+				if( $this->mOptions->getUseTeX() ) {
 					$math_content[$marker] = renderMath( $content );
 				} else {
-					$math_content[$marker] = "$content";
+					$math_content[$marker] = "<math>$content<math>";
 				}
+			} else {
+				$math_content[$marker] = "$content";
 			}
 		}
 
@@ -199,31 +239,37 @@ class Parser
 				$pre_content[$marker] = "
$content
"; } } - - $state = array( - 'nowiki' => $nowiki_content, - 'hiero' => $hiero_content, - 'math' => $math_content, - 'pre' => $pre_content, - 'item' => $item_content - ); + if($stripcomments) { + $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); + foreach( $comment_content as $marker => $content ){ + $comment_content[$marker] = ""; + } + } + + # Merge state with the pre-existing state, if there is one + if ( $state ) { + $state['nowiki'] = $state['nowiki'] + $nowiki_content; + $state['hiero'] = $state['hiero'] + $hiero_content; + $state['math'] = $state['math'] + $math_content; + $state['pre'] = $state['pre'] + $pre_content; + $state['comment'] = $state['comment'] + $comment_content; + } else { + $state = array( + 'nowiki' => $nowiki_content, + 'hiero' => $hiero_content, + 'math' => $math_content, + 'pre' => $pre_content, + 'comment' => $comment_content + ); + } return $text; } function unstrip( $text, &$state ) { # Must expand in reverse order, otherwise nested tags will be corrupted - /* - $dicts = array( 'item', 'pre', 'math', 'hiero', 'nowiki' ); - foreach ( $dicts as $dictName ) { - $content_dict = $state[$dictName]; - foreach( $content_dict as $marker => $content ){ - $text = str_replace( $marker, $content, $text ); - } - }*/ - $contentDict = end( $state ); - for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { + for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { $text = str_replace( key( $contentDict ), $content, $text ); } @@ -231,7 +277,7 @@ class Parser return $text; } - + # Add an item to the strip state # Returns the unique tag which must be inserted into the stripped text # The tag will be replaced with the original text in unstrip() @@ -240,64 +286,65 @@ class Parser { $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString(); if ( !$state ) { - $state = array( + $state = array( 'nowiki' => array(), 'hiero' => array(), 'math' => array(), - 'pre' => array(), - 'item' => array() + 'pre' => array() ); } $state['item'][$rnd] = $text; return $rnd; } - + + # This method generates the list of subcategories and pages for a category function categoryMagic () { global $wgLang , $wgUser ; - if ( !$this->mOptions->getUseCategoryMagic() ) return ; - $id = $this->mTitle->getArticleID() ; - $cat = $wgLang->ucfirst ( wfMsg ( "category" ) ) ; - $ti = $this->mTitle->getText() ; - $ti = explode ( ":" , $ti , 2 ) ; - if ( $cat != $ti[0] ) return "" ; - $r = "
\n" ; + if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all - $articles = array() ; - $parents = array () ; - $children = array() ; + $cns = Namespace::getCategory() ; + if ( $this->mTitle->getNamespace() != $cns ) return "" ; # This ain't a category page + + $r = "
\n"; -# $sk =& $this->mGetSkin(); $sk =& $wgUser->getSkin() ; + $articles = array() ; + $children = array() ; $data = array () ; - $sql1 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id"; - $sql2 = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to={$id} AND bl_from=cur_id" ; - - $res = wfQuery ( $sql1, DB_READ ) ; - while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; - - $res = wfQuery ( $sql2, DB_READ ) ; - while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; + $id = $this->mTitle->getArticleID() ; + # For existing categories + if( $id ) { + $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,links WHERE l_to={$id} AND l_from=cur_id"; + $res = wfQuery ( $sql, DB_READ ) ; + while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; + } else { + # For non-existing categories + $t = wfStrencode( $this->mTitle->getPrefixedDBKey() ); + $sql = "SELECT DISTINCT cur_title,cur_namespace FROM cur,brokenlinks WHERE bl_to='$t' AND bl_from=cur_id" ; + $res = wfQuery ( $sql, DB_READ ) ; + while ( $x = wfFetchObject ( $res ) ) $data[] = $x ; + } + # For all pages that link to this category foreach ( $data AS $x ) { $t = $wgLang->getNsText ( $x->cur_namespace ) ; if ( $t != "" ) $t .= ":" ; $t .= $x->cur_title ; - $y = explode ( ":" , $t , 2 ) ; - if ( count ( $y ) == 2 && $y[0] == $cat ) { - array_push ( $children , $sk->makeLink ( $t , $y[1] ) ) ; + if ( $x->cur_namespace == $cns ) { + array_push ( $children , $sk->makeLink ( $t ) ) ; # Subcategory } else { - array_push ( $articles , $sk->makeLink ( $t ) ) ; + array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category } } wfFreeResult ( $res ) ; - # Children + # Showing subcategories if ( count ( $children ) > 0 ) { asort ( $children ) ; @@ -305,11 +352,12 @@ class Parser $r .= implode ( ", " , $children ) ; } - # Articles + # Showing pages in this category if ( count ( $articles ) > 0 ) { + $ti = $this->mTitle->getText() ; asort ( $articles ) ; - $h = wfMsg( "category_header", $ti[1] ); + $h = wfMsg( "category_header", $ti ); $r .= "

{$h}

\n" ; $r .= implode ( ", " , $articles ) ; } @@ -339,7 +387,7 @@ class Parser { if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-) $htmlattrs = $this->getHTMLattrs() ; - + # Strip non-approved attributes from the tag $t = preg_replace( "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e", @@ -348,7 +396,7 @@ class Parser # Strip javascript "expression" from stylesheets. Brute force approach: # If anythin offensive is found, all attributes of the HTML tag are dropped - if( preg_match( + if( preg_match( "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is", wfMungeToUtf8( $t ) ) ) { @@ -358,6 +406,47 @@ class Parser return trim ( $t ) ; } + /* interface with html tidy, used if $wgUseTidy = true */ + function tidy ( $text ) { + global $wgTidyConf, $wgTidyBin, $wgTidyOpts; + global $wgInputEncoding, $wgOutputEncoding; + $cleansource = ''; + switch(strtoupper($wgOutputEncoding)) { + case 'ISO-8859-1': + $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -latin1':' -raw'; + break; + case 'UTF-8': + $wgTidyOpts .= ($wgInputEncoding == $wgOutputEncoding)? ' -utf8':' -raw'; + break; + default: + $wgTidyOpts .= ' -raw'; + } + + $text = ''. +'test'.$text.''; + $descriptorspec = array( + 0 => array("pipe", "r"), + 1 => array("pipe", "w"), + 2 => array("file", "/dev/null", "a") + ); + $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes); + if (is_resource($process)) { + fwrite($pipes[0], $text); + fclose($pipes[0]); + while (!feof($pipes[1])) { + $cleansource .= fgets($pipes[1], 1024); + } + fclose($pipes[1]); + $return_value = proc_close($process); + } + if( $cleansource == '' && $text != '') { + return '

'.wfMsg('seriousxhtmlerrors').'

'.htmlspecialchars($text).'
'; + } else { + return $cleansource; + } + } + function doTableStuff ( $t ) { $t = explode ( "\n" , $t ) ; @@ -367,7 +456,7 @@ class Parser $ltr = array () ; # tr attributes foreach ( $t AS $k => $x ) { - $x = rtrim ( $x ) ; + $x = trim ( $x ) ; $fc = substr ( $x , 0 , 1 ) ; if ( "{|" == substr ( $x , 0 , 2 ) ) { @@ -388,7 +477,7 @@ class Parser $t[$k] = $z ; } /* else if ( "|_" == substr ( $x , 0 , 2 ) ) # Caption - { + { $z = trim ( substr ( $x , 2 ) ) ; $t[$k] = "{$z}\n" ; }*/ @@ -422,7 +511,7 @@ class Parser { $z = "" ; if ( $fc != "+" ) - { + { $tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = "\n" ; array_push ( $tr , true ) ; @@ -458,54 +547,32 @@ class Parser return $t ; } - # Well, OK, it's actually about 14 passes. But since all the - # hard lifting is done inside PHP's regex code, it probably - # wouldn't speed things up much to add a real parser. - # - function doWikiPass2( $text, $linestart ) + function internalParse( $text, $linestart, $args = array() ) { - $fname = "Parser::doWikiPass2"; + $fname = "Parser::internalParse"; wfProfileIn( $fname ); - + $text = $this->removeHTMLtags( $text ); - $text = $this->replaceVariables( $text ); + $text = $this->replaceVariables( $text, $args ); # $text = preg_replace( "/(^|\n)-----*/", "\\1
", $text ); $text = $this->doHeadings( $text ); - if($this->mOptions->getUseDynamicDates()) { global $wgDateFormatter; $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); } - $text = $this->replaceExternalLinks( $text ); $text = $this->doTokenizedParser ( $text ); - $text = $this->doTableStuff ( $text ) ; - $text = $this->formatHeadings( $text ); - $sk =& $this->mOptions->getSkin(); $text = $sk->transformContent( $text ); - $fixtags = array( - "/
/i" => '
', - "/
/i" => '
', - "/
/i"=>'', - "/<\\/center *>/i" => '' - ); - $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); - // another round, but without regex - $fixtags = array( - '& ' => '&', - '&<' => '&<', - ); - $text = str_replace( array_keys($fixtags), array_values($fixtags), $text ); - $text .= $this->categoryMagic () ; - - # needs to be called last - $text = $this->doBlockLevels( $text, $linestart ); + if ( !isset ( $this->categoryMagicDone ) ) { + $text .= $this->categoryMagic () ; + $this->categoryMagicDone = true ; + } wfProfileOut( $fname ); return $text; @@ -540,18 +607,18 @@ class Parser wfProfileOut( $fname ); return $text; } - + /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) { $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3"; $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF"; - - # this is the list of separators that should be ignored if they + + # this is the list of separators that should be ignored if they # are the last character of an URL but that should be included # if they occur within the URL, e.g. "go to www.foo.com, where .." # in this case, the last comma should not become part of the URL, # but in "www.foo.com/123,2342,32.htm" it should. - $sep = ",;\.:"; + $sep = ",;\.:"; $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF"; $images = "gif|png|jpg|jpeg"; @@ -560,7 +627,7 @@ class Parser # that the content of the string should be inserted there). $e1 = "/(^|[^\\[])({$protocol}:)([{$uc}{$sep}]+)\\/([{$fnc}]+)\\." . "((?i){$images})([^{$uc}]|$)/"; - + $e2 = "/(^|[^\\[])({$protocol}:)(([".$uc."]|[".$sep."][".$uc."])+)([^". $uc . $sep. "]|[".$sep."]|$)/"; $sk =& $this->mOptions->getSkin(); @@ -590,7 +657,7 @@ class Parser } else if ( preg_match( $e2, $line, $m ) ) { $link = "{$protocol}:{$m[1]}"; $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { $s .= "[{$protocol}:" . $line; continue; @@ -621,7 +688,7 @@ class Parser $state["strong"] = FALSE; } else { $s = ""; - $state["strong"] = $token["pos"]; + $state["strong"] = isset($token["pos"]) ? $token["pos"] : true; } return $s; } @@ -639,15 +706,16 @@ class Parser $state["em"] = FALSE; } else { $s = ""; - $state["em"] = $token["pos"]; + $state["em"] = isset($token["pos"]) ? $token["pos"] : true; + } return $s; } - + /* private */ function handle5Quotes( &$state, $token ) { $s = ""; - if ( $state["em"] !== false && $state["strong"] ) { + if ( $state["em"] !== false && $state["strong"] !== false ) { if ( $state["em"] < $state["strong"] ) { $s .= ""; } else { @@ -664,7 +732,7 @@ class Parser $state["em"] = $token["pos"]; } else { # not $em and not $strong $s .= ""; - $state["strong"] = $state["em"] = $token["pos"]; + $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true; } return $s; } @@ -672,16 +740,17 @@ class Parser /* private */ function doTokenizedParser( $str ) { global $wgLang; # for language specific parser hook + global $wgUploadDirectory, $wgUseTimeline; $tokenizer=Tokenizer::newFromString( $str ); $tokenStack = array(); - + $s=""; $state["em"] = FALSE; $state["strong"] = FALSE; $tagIsOpen = FALSE; $threeopen = false; - + # The tokenizer splits the text into tokens and returns them one by one. # Every call to the tokenizer returns a new token. while ( $token = $tokenizer->nextToken() ) @@ -692,6 +761,13 @@ class Parser # simple text with no further markup $txt = $token["text"]; break; + case "blank": + # Text that contains blanks that have to be converted to + # non-breakable spaces for French. + # U+202F NARROW NO-BREAK SPACE might be a better choice, but + # browser support for Unicode spacing is poor. + $txt = str_replace( " ", " ", $token["text"] ); + break; case "[[[": # remember the tag opened with 3 [ $threeopen = true; @@ -702,13 +778,13 @@ class Parser array_push( $tokenStack, $token ); $txt=""; break; - + case "]]]": case "]]": # link close tag. # get text from stack, glue it together, and call the code to handle a # link - + if ( count( $tokenStack ) == 0 ) { # stack empty. Found a ]] without an opening [[ @@ -723,31 +799,30 @@ class Parser } $lastToken = array_pop( $tokenStack ); } - + $txt = $linkText ."]]"; - + if( isset( $lastToken["text"] ) ) { $prefix = $lastToken["text"]; } else { $prefix = ""; } $nextToken = $tokenizer->previewToken(); - if ( $nextToken["type"] == "text" ) + if ( $nextToken["type"] == "text" ) { # Preview just looks at it. Now we have to fetch it. $nextToken = $tokenizer->nextToken(); $txt .= $nextToken["text"]; } - $fakestate = $this->mStripState; - $txt = $this->handleInternalLink( $this->unstrip($txt,$fakestate), $prefix ); + $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix ); - # did the tag start with 3 [ ? + # did the tag start with 3 [ ? if($threeopen) { # show the first as text $txt = "[".$txt; $threeopen=false; } - + } $tagIsOpen = (count( $tokenStack ) != 0); break; @@ -782,6 +857,15 @@ class Parser $txt = $this->doMagicISBN( $tokenizer ); } break; + case "": + if ( $wgUseTimeline && + "" != ( $timelinesrc = $tokenizer->readAllUntil("</timeline>") ) ) + { + $txt = renderTimeline( $timelinesrc ); + } else { + $txt=$token["text"]; + } + break; default: # Call language specific Hook. $txt = $wgLang->processToken( $token, $tokenStack ); @@ -813,7 +897,7 @@ class Parser $txt = $lastToken["text"] . $txt; } else { $txt = $lastToken["type"] . $txt; - } + } } $s .= $txt; } @@ -840,7 +924,7 @@ class Parser #$e2 = "/^(.*)\\b(\\w+)\$/suD"; #$e2 = "/^(.*\\s)(\\S+)\$/suD"; static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD'; - + # Special and Media are pseudo-namespaces; no pages actually exist in them static $image = FALSE; @@ -850,21 +934,21 @@ class Parser if ( !$image ) { $image = Namespace::getImage(); } if ( !$special ) { $special = Namespace::getSpecial(); } if ( !$media ) { $media = Namespace::getMedia(); } - if ( !$category ) { $category = wfMsg ( "category" ) ; } - + if ( !$category ) { $category = Namespace::getCategory(); } + $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() ); wfProfileOut( "$fname-setup" ); $s = ""; - + if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; - $trail = $m[3]; + $trail = $m[3]; } else { # Invalid form; output directly $s .= $prefix . "[[" . $line ; return $s; } - + /* Valid link forms: Foobar -- normal :Foobar -- override special treatment of prefix (images, language links) @@ -875,7 +959,7 @@ class Parser $noforce = ($c != ":"); if( $c == "/" ) { # subpage if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown - $m[1]=substr($m[1],1,strlen($m[1])-2); + $m[1]=substr($m[1],1,strlen($m[1])-2); $noslash=$m[1]; } else { $noslash=substr($m[1],1); @@ -883,7 +967,7 @@ class Parser if($wgNamespacesWithSubpages[$this->mTitle->getNamespace()]) { # subpages allowed here $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash); if( "" == $text ) { - $text= $m[1]; + $text= $m[1]; } # this might be changed for ugliness reasons } else { $link = $noslash; # no subpage allowed, use standard link @@ -906,6 +990,7 @@ class Parser if( $noforce ) { if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) { array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() ); + $s .= $prefix . $trail ; return (trim($s) == '')? '': $s; } if( $ns == $image ) { @@ -913,30 +998,22 @@ class Parser $wgLinkCache->addImageLinkObj( $nt ); return $s; } + if ( $ns == $category ) { + $t = $nt->getText() ; + $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; + $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix ); + $this->mOutput->mCategoryLinks[] = $t ; + $s .= $prefix . $trail ; + return $s ; + } } if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && ( strpos( $link, "#" ) == FALSE ) ) { - $s .= $prefix . "" . $text . "" . $trail; + # Self-links are handled specially; generally de-link and change to bold. + $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail ); return $s; } - # Category feature - $catns = strtoupper ( $nt->getDBkey () ) ; - $catns = explode ( ":" , $catns ) ; - if ( count ( $catns ) > 1 ) $catns = array_shift ( $catns ) ; - else $catns = "" ; - if ( $catns == strtoupper($category) && $this->mOptions->getUseCategoryMagic() ) { - $t = explode ( ":" , $nt->getText() ) ; - array_shift ( $t ) ; - $t = implode ( ":" , $t ) ; - $t = $wgLang->ucFirst ( $t ) ; - $nnt = Title::newFromText ( $category.":".$t ) ; - $t = $sk->makeLinkObj( $nnt, $t, "", $trail , $prefix ); - $this->mOutput->mCategoryLinks[] = $t ; - $s .= $prefix . $trail ; - return $s ; - } - if( $ns == $media ) { $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; $wgLinkCache->addImageLinkObj( $nt ); @@ -959,6 +1036,7 @@ class Parser if ( '' != $this->mLastSection ) { $result = "mLastSection . ">\n"; } + $this->mInPre = false; $this->mLastSection = ""; return $result; } @@ -1028,75 +1106,106 @@ class Parser return $text."\n"; } - /* private */ function doBlockLevels( $text, $linestart ) - { + /* private */ function doBlockLevels( $text, $linestart ) { $fname = "Parser::doBlockLevels"; wfProfileIn( $fname ); + # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc. # - $a = explode( "\n", $text ); - $lastPref = $text = $lastLine = ''; - $this->mDTopen = $inBlockElem = false; - - if ( ! $linestart ) { $text .= array_shift( $a ); } - foreach ( $a as $t ) { - if ( "" != $text ) { $text .= "\n"; } + $textLines = explode( "\n", $text ); - $oLine = $t; - $opl = strlen( $lastPref ); - $npl = strspn( $t, "*#:;" ); - $pref = substr( $t, 0, $npl ); - $pref2 = str_replace( ";", ":", $pref ); - $t = substr( $t, $npl ); + $lastPrefix = $output = $lastLine = ''; + $this->mDTopen = $inBlockElem = false; + $prefixLength = 0; + $paragraphStack = false; + + if ( !$linestart ) { + $output .= array_shift( $textLines ); + } + foreach ( $textLines as $oLine ) { + $lastPrefixLength = strlen( $lastPrefix ); + $preCloseMatch = preg_match("/<\\/pre/i", $oLine ); + $preOpenMatch = preg_match("/
mInPre) {
+				$this->mInPre = !empty($preOpenMatch);
+			}
+			if ( !$this->mInPre ) {
+				# Multiple prefixes may abut each other for nested lists.
+				$prefixLength = strspn( $oLine, "*#:;" );
+				$pref = substr( $oLine, 0, $prefixLength );
+				
+				# eh?
+				$pref2 = str_replace( ";", ":", $pref );
+				$t = substr( $oLine, $prefixLength );
+			} else {
+				# Don't interpret any other prefixes in preformatted text
+				$prefixLength = 0;
+				$pref = $pref2 = '';
+				$t = $oLine;
+			}
 
-			if ( 0 != $npl && 0 == strcmp( $lastPref, $pref2 ) ) {
-				$text .= $this->nextItem( substr( $pref, -1 ) );
+			# List generation
+			if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
+				# Same as the last item, so no need to deal with nesting or opening stuff
+				$output .= $this->nextItem( substr( $pref, -1 ) );
+				$paragraphStack = false;
 
 				if ( ";" == substr( $pref, -1 ) ) {
-					$cpos = strpos( $t, ":" );
-					if ( ! ( false === $cpos ) ) {
-						$term = substr( $t, 0, $cpos );
-						$text .= $term . $this->nextItem( ":" );
-						$t = substr( $t, $cpos + 1 );
+					# The one nasty exception: definition lists work like this:
+					# ; title : definition text
+					# So we check for : in the remainder text to split up the
+					# title and definition, without b0rking links.
+					# FIXME: This is not foolproof. Something better in Tokenizer might help.
+					if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+						$term = $match[1];
+						$output .= $term . $this->nextItem( ":" );
+						$t = $match[2];
 					}
 				}
-			} else if (0 != $npl || 0 != $opl) {
-				$cpl = $this->getCommon( $pref, $lastPref );
-
-				while ( $cpl < $opl ) {
-					$text .= $this->closeList( $lastPref{$opl-1} );
-					--$opl;
+			} elseif( $prefixLength || $lastPrefixLength ) {
+				# Either open or close a level...
+				$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
+				$paragraphStack = false;
+
+				while( $commonPrefixLength < $lastPrefixLength ) {
+					$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
+					--$lastPrefixLength;
 				}
-				if ( $npl <= $cpl && $cpl > 0 ) {
-					$text .= $this->nextItem( $pref{$cpl-1} );
+				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
+					$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
 				}
-				while ( $npl > $cpl ) {
-					$char = substr( $pref, $cpl, 1 );
-					$text .= $this->openList( $char );
+				while ( $prefixLength > $commonPrefixLength ) {
+					$char = substr( $pref, $commonPrefixLength, 1 );
+					$output .= $this->openList( $char );
 
 					if ( ";" == $char ) {
-						$cpos = strpos( $t, ":" );
-						if ( ! ( false === $cpos ) ) {
-							$term = substr( $t, 0, $cpos );
-							$text .= $term . $this->nextItem( ":" );
-							$t = substr( $t, $cpos + 1 );
+						# FIXME: This is dupe of code above
+						if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) {
+							$term = $match[1];
+							$output .= $term . $this->nextItem( ":" );
+							$t = $match[2];
 						}
 					}
-					++$cpl;
+					++$commonPrefixLength;
 				}
-				$lastPref = $pref2;
+				$lastPrefix = $pref2;
 			}
-			if ( 0 == $npl ) { # No prefix--go to paragraph mode
+			if( 0 == $prefixLength ) {
+				# No prefix (not in list)--go to paragraph mode
 				$uniq_prefix = UNIQ_PREFIX;
 				// XXX: use a stack for nestable elements like span, table and div
-				$openmatch = preg_match("/(closeParagraph();
+					$paragraphStack = false;
+					$output .= $this->closeParagraph();
+					if($preOpenMatch and !$preCloseMatch) {
+						$this->mInPre = true;	
+					}
 					if ( $closematch  ) {
 						$inBlockElem = false;
 					} else {
@@ -1104,42 +1213,56 @@ class Parser
 					}
 				} else if ( !$inBlockElem ) {
 					if ( " " == $t{0} ) {
-						$newSection = "pre";
-						$text .= $this->closeParagraph();
-						$text .= "<" . $newSection . ">";
-						$this->mLastSection = $newSection;
-					} else { 
-						$newSection = "p";
+						// pre
+						if ($this->mLastSection != 'pre') {
+							$paragraphStack = false;
+							$output .= $this->closeParagraph().'
';
+							$this->mLastSection = 'pre';
+						}
+					} else {
+						// paragraph
 						if ( '' == trim($t) ) {
-							if ( '' == trim($lastLine) ) {
-								$text .= $this->closeParagraph();
-								$text .= "<" . $newSection . ">
"; - $this->mLastSection = $newSection; + if ( $paragraphStack ) { + $output .= $paragraphStack.'
'; + $paragraphStack = false; + $this->mLastSection = 'p'; } else { - $t = ''; + if ($this->mLastSection != 'p' ) { + $output .= $this->closeParagraph(); + $this->mLastSection = ''; + $paragraphStack = "

"; + } else { + $paragraphStack = '

'; + } } } else { - $text .= $this->closeParagraph(); - $text .= "<" . $newSection . ">"; - $this->mLastSection = $newSection; + if ( $paragraphStack ) { + $output .= $paragraphStack; + $paragraphStack = false; + $this->mLastSection = 'p'; + } else if ($this->mLastSection != 'p') { + $output .= $this->closeParagraph().'

'; + $this->mLastSection = 'p'; + } } } - - } + } + } + if ($paragraphStack === false) { + $output .= $t."\n"; } - $lastLine = $t; - $text .= $t; } - while ( $npl ) { - $text .= $this->closeList( $pref2{$npl-1} ); - --$npl; + while ( $prefixLength ) { + $output .= $this->closeList( $pref2{$prefixLength-1} ); + --$prefixLength; } if ( "" != $this->mLastSection ) { - $text .= "mLastSection . ">"; + $output .= "mLastSection . ">"; $this->mLastSection = ""; } + wfProfileOut( $fname ); - return $text; + return $output; } function getVariableValue( $index ) { @@ -1154,6 +1277,11 @@ class Parser return $wgLang->getMonthNameGen( date("n") ); case MAG_CURRENTDAY: return date("j"); + case MAG_PAGENAME: + return $this->mTitle->getText(); + case MAG_NAMESPACE: + # return Namespace::getCanonicalName($this->mTitle->getNamespace()); + return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori case MAG_CURRENTDAYNAME: return $wgLang->getWeekdayName( date("w")+1 ); case MAG_CURRENTYEAR: @@ -1181,67 +1309,30 @@ class Parser } } - /* private */ function replaceVariables( $text ) + /* private */ function replaceVariables( $text, $args = array() ) { - global $wgLang, $wgCurParser; - global $wgScript, $wgArticlePath; + global $wgLang, $wgScript, $wgArticlePath; $fname = "Parser::replaceVariables"; wfProfileIn( $fname ); - + $bail = false; if ( !$this->mVariables ) { $this->initialiseVariables(); } $titleChars = Title::legalChars(); - $regex = "/{{([$titleChars\\|]*?)}}/s"; - - # "Recursive" variable expansion: run it through a couple of passes - for ( $i=0; $ifork(); - - $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); - if ( $oldText == $text ) { - $bail = true; - } - $this->merge( $wgCurParser ); - } + $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s"; - return $text; - } + # This function is called recursively. To keep track of arguments we need a stack: + array_push( $this->mArgStack, $args ); - # Returns a copy of this object except with various variables cleared - # This copy can be re-merged with the parent after operations on the copy - function fork() - { - $copy = $this; - $copy->mOutput = new ParserOutput; - return $copy; - } + # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array + $GLOBALS['wgCurParser'] =& $this; + $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); - # Merges a copy split off with fork() - function merge( &$copy ) - { - # Output objects - $this->mOutput->merge( $copy->mOutput ); - - # Include throttling arrays - foreach( $copy->mIncludeCount as $dbk => $count ) { - if ( array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] += $count; - } else { - $this->mIncludeCount[$dbk] = $count; - } - } + array_pop( $this->mArgStack ); - # Strip states - foreach( $copy->mStripState as $dictName => $contentDict ) { - $this->mStripState[$dictName] += $contentDict; - } + return $text; } function braceSubstitution( $matches ) @@ -1250,12 +1341,25 @@ class Parser $fname = "Parser::braceSubstitution"; $found = false; $nowiki = false; - - $text = $matches[1]; + $title = NULL; + + # $newline is an optional newline character before the braces + # $part1 is the bit before the first |, and must contain only title characters + # $args is a list of arguments, starting from index 0, not including $part1 + + $newline = $matches[1]; + $part1 = $matches[2]; + # If the third subpattern matched anything, it will start with | + if ( $matches[3] !== "" ) { + $args = explode( "|", substr( $matches[3], 1 ) ); + } else { + $args = array(); + } + $argc = count( $args ); # SUBST $mwSubst =& MagicWord::get( MAG_SUBST ); - if ( $mwSubst->matchStartAndRemove( $text ) ) { + if ( $mwSubst->matchStartAndRemove( $part1 ) ) { if ( $this->mOutputType != OT_WIKI ) { # Invalid SUBST not replaced at PST time # Return without further processing @@ -1267,37 +1371,39 @@ class Parser $text = $matches[0]; $found = true; } - + # MSG, MSGNW and INT if ( !$found ) { # Check for MSGNW: $mwMsgnw =& MagicWord::get( MAG_MSGNW ); - if ( $mwMsgnw->matchStartAndRemove( $text ) ) { + if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { $nowiki = true; } else { # Remove obsolete MSG: $mwMsg =& MagicWord::get( MAG_MSG ); - $mwMsg->matchStartAndRemove( $text ); + $mwMsg->matchStartAndRemove( $part1 ); } - + # Check if it is an internal message $mwInt =& MagicWord::get( MAG_INT ); - if ( $mwInt->matchStartAndRemove( $text ) ) { - $text = wfMsg( $text ); - $found = true; + if ( $mwInt->matchStartAndRemove( $part1 ) ) { + if ( $this->incrementIncludeCount( "int:$part1" ) ) { + $text = wfMsgReal( $part1, $args, true ); + $found = true; + } } } - + # NS if ( !$found ) { # Check for NS: (namespace expansion) $mwNs = MagicWord::get( MAG_NS ); - if ( $mwNs->matchStartAndRemove( $text ) ) { - if ( intval( $text ) ) { - $text = $wgLang->getNsText( intval( $text ) ); + if ( $mwNs->matchStartAndRemove( $part1 ) ) { + if ( intval( $part1 ) ) { + $text = $wgLang->getNsText( intval( $part1 ) ); $found = true; } else { - $index = Namespace::getCanonicalIndex( strtolower( $text ) ); + $index = Namespace::getCanonicalIndex( strtolower( $part1 ) ); if ( !is_null( $index ) ) { $text = $wgLang->getNsText( $index ); $found = true; @@ -1305,86 +1411,62 @@ class Parser } } } - + # LOCALURL and LOCALURLE if ( !$found ) { $mwLocal = MagicWord::get( MAG_LOCALURL ); $mwLocalE = MagicWord::get( MAG_LOCALURLE ); - if ( $mwLocal->matchStartAndRemove( $text ) ) { + if ( $mwLocal->matchStartAndRemove( $part1 ) ) { $func = 'getLocalURL'; - } elseif ( $mwLocalE->matchStartAndRemove( $text ) ) { + } elseif ( $mwLocalE->matchStartAndRemove( $part1 ) ) { $func = 'escapeLocalURL'; } else { $func = ''; } - + if ( $func !== '' ) { - $args = explode( "|", $text ); - $n = count( $args ); - if ( $n > 0 ) { - $title = Title::newFromText( $args[0] ); - if ( !is_null( $title ) ) { - if ( $n > 1 ) { - $text = $title->$func( $args[1] ); - } else { - $text = $title->$func(); - } - $found = true; + $title = Title::newFromText( $part1 ); + if ( !is_null( $title ) ) { + if ( $argc > 0 ) { + $text = $title->$func( $args[0] ); + } else { + $text = $title->$func(); } + $found = true; } - } + } } - - # Check for a match against internal variables - if ( !$found && array_key_exists( $text, $this->mVariables ) ) { - $text = $this->mVariables[$text]; + + # Internal variables + if ( !$found && array_key_exists( $part1, $this->mVariables ) ) { + $text = $this->mVariables[$part1]; $found = true; $this->mOutput->mContainsOldMagic = true; - } - + } + + # Arguments input from the caller + $inputArgs = end( $this->mArgStack ); + if ( !$found && array_key_exists( $part1, $inputArgs ) ) { + $text = $inputArgs[$part1]; + $found = true; + } + # Load from database if ( !$found ) { - $title = Title::newFromText( $text, NS_TEMPLATE ); - if ( is_object( $title ) && !$title->isExternal() ) { + $title = Title::newFromText( $part1, NS_TEMPLATE ); + if ( !is_null( $title ) && !$title->isExternal() ) { # Check for excessive inclusion $dbk = $title->getPrefixedDBkey(); - if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { - $this->mIncludeCount[$dbk] = 0; - } - if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + if ( $this->incrementIncludeCount( $dbk ) ) { $article = new Article( $title ); $articleContent = $article->getContentWithoutUsingSoManyDamnGlobals(); if ( $articleContent !== false ) { $found = true; $text = $articleContent; - - # Escaping and link table handling - # Not required for preSaveTransform() - if ( $this->mOutputType == OT_HTML ) { - if ( $nowiki ) { - $text = wfEscapeWikiText( $text ); - } else { - $text = $this->removeHTMLtags( $text ); - } - # Do not enter included links in link table - $wgLinkCache->suspend(); - - # Run full parser on the included text - $text = $this->strip( $text, $this->mStripState ); - $text = $this->doWikiPass2( $text, true ); - - # Add the result to the strip state for re-inclusion after - # the rest of the processing - $text = $this->insertStripItem( $text, $this->mStripState ); - - # Resume the link cache and register the inclusion as a link - $wgLinkCache->resume(); - $wgLinkCache->addLinkObj( $title ); - } - } - } + } + } # If the title is valid but undisplayable, make a link to it if ( $this->mOutputType == OT_HTML && !$found ) { @@ -1394,6 +1476,50 @@ class Parser } } + # Recursive parsing, escaping and link table handling + # Only for HTML output + if ( $nowiki && $found && $this->mOutputType == OT_HTML ) { + $text = wfEscapeWikiText( $text ); + } elseif ( $this->mOutputType == OT_HTML && $found ) { + # Clean up argument array + $assocArgs = array(); + $index = 1; + foreach( $args as $arg ) { + $eqpos = strpos( $arg, "=" ); + if ( $eqpos === false ) { + $assocArgs[$index++] = $arg; + } else { + $name = trim( substr( $arg, 0, $eqpos ) ); + $value = trim( substr( $arg, $eqpos+1 ) ); + if ( $value === false ) { + $value = ""; + } + if ( $name !== false ) { + $assocArgs[$name] = $value; + } + } + } + + # Do not enter included links in link table + if ( !is_null( $title ) ) { + $wgLinkCache->suspend(); + } + + # Run full parser on the included text + $text = $this->strip( $text, $this->mStripState ); + $text = $this->internalParse( $text, (bool)$newline, $assocArgs ); + + # Add the result to the strip state for re-inclusion after + # the rest of the processing + $text = $this->insertStripItem( $text, $this->mStripState ); + + # Resume the link cache and register the inclusion as a link + if ( !is_null( $title ) ) { + $wgLinkCache->resume(); + $wgLinkCache->addLinkObj( $title ); + } + } + if ( !$found ) { return $matches[0]; } else { @@ -1401,101 +1527,140 @@ class Parser } } + # Returns true if the function is allowed to include this entity + function incrementIncludeCount( $dbk ) + { + if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { + $this->mIncludeCount[$dbk] = 0; + } + if ( ++$this->mIncludeCount[$dbk] <= MAX_INCLUDE_REPEAT ) { + return true; + } else { + return false; + } + } + + # Cleans up HTML, removes dangerous tags and attributes /* private */ function removeHTMLtags( $text ) { + global $wgUseTidy, $wgUserHtml; $fname = "Parser::removeHTMLtags"; wfProfileIn( $fname ); - $htmlpairs = array( # Tags that must be closed - "b", "i", "u", "font", "big", "small", "sub", "sup", "h1", - "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", - "strike", "strong", "tt", "var", "div", "center", - "blockquote", "ol", "ul", "dl", "table", "caption", "pre", - "ruby", "rt" , "rb" , "rp", "p" - ); - $htmlsingle = array( - "br", "hr", "li", "dt", "dd" - ); - $htmlnest = array( # Tags that can be nested--?? - "table", "tr", "td", "th", "div", "blockquote", "ol", "ul", - "dl", "font", "big", "small", "sub", "sup" - ); - $tabletags = array( # Can only appear inside table - "td", "th", "tr" - ); + + if( $wgUserHtml ) { + $htmlpairs = array( # Tags that must be closed + "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1", + "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", + "strike", "strong", "tt", "var", "div", "center", + "blockquote", "ol", "ul", "dl", "table", "caption", "pre", + "ruby", "rt" , "rb" , "rp", "p" + ); + $htmlsingle = array( + "br", "hr", "li", "dt", "dd" + ); + $htmlnest = array( # Tags that can be nested--?? + "table", "tr", "td", "th", "div", "blockquote", "ol", "ul", + "dl", "font", "big", "small", "sub", "sup" + ); + $tabletags = array( # Can only appear inside table + "td", "th", "tr" + ); + } else { + $htmlpairs = array(); + $htmlsingle = array(); + $htmlnest = array(); + $tabletags = array(); + } $htmlsingle = array_merge( $tabletags, $htmlsingle ); $htmlelements = array_merge( $htmlsingle, $htmlpairs ); - $htmlattrs = $this->getHTMLattrs () ; + $htmlattrs = $this->getHTMLattrs () ; # Remove HTML comments - $text = preg_replace( "//sU", "", $text ); + $text = preg_replace( "/(\\n * *(?=\\n)|)/sU", "$2", $text ); $bits = explode( "<", $text ); $text = array_shift( $bits ); - $tagstack = array(); $tablestack = array(); - - foreach ( $bits as $x ) { - $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", - $x, $regs ); - list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; - error_reporting( $prev ); - - $badtag = 0 ; - if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { - # Check our stack - if ( $slash ) { - # Closing a tag... - if ( ! in_array( $t, $htmlsingle ) && - ( $ot = array_pop( $tagstack ) ) != $t ) { - array_push( $tagstack, $ot ); - $badtag = 1; + if(!$wgUseTidy) { + $tagstack = array(); $tablestack = array(); + foreach ( $bits as $x ) { + $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); + preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + $x, $regs ); + list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; + error_reporting( $prev ); + + $badtag = 0 ; + if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + # Check our stack + if ( $slash ) { + # Closing a tag... + if ( ! in_array( $t, $htmlsingle ) && + ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) { + if(!empty($ot)) array_push( $tagstack, $ot ); + $badtag = 1; + } else { + if ( $t == "table" ) { + $tagstack = array_pop( $tablestack ); + } + $newparams = ""; + } } else { - if ( $t == "table" ) { - $tagstack = array_pop( $tablestack ); + # Keep track for later + if ( in_array( $t, $tabletags ) && + ! in_array( "table", $tagstack ) ) { + $badtag = 1; + } else if ( in_array( $t, $tagstack ) && + ! in_array ( $t , $htmlnest ) ) { + $badtag = 1 ; + } else if ( ! in_array( $t, $htmlsingle ) ) { + if ( $t == "table" ) { + array_push( $tablestack, $tagstack ); + $tagstack = array(); + } + array_push( $tagstack, $t ); } - $newparams = ""; + # Strip non-approved attributes from the tag + $newparams = $this->fixTagAttributes($params); + } - } else { - # Keep track for later - if ( in_array( $t, $tabletags ) && - ! in_array( "table", $tagstack ) ) { - $badtag = 1; - } else if ( in_array( $t, $tagstack ) && - ! in_array ( $t , $htmlnest ) ) { - $badtag = 1 ; - } else if ( ! in_array( $t, $htmlsingle ) ) { - if ( $t == "table" ) { - array_push( $tablestack, $tagstack ); - $tagstack = array(); - } - array_push( $tagstack, $t ); + if ( ! $badtag ) { + $rest = str_replace( ">", ">", $rest ); + $text .= "<$slash$t $newparams$brace$rest"; + continue; } - # Strip non-approved attributes from the tag - $newparams = $this->fixTagAttributes($params); - } - if ( ! $badtag ) { + $text .= "<" . str_replace( ">", ">", $x); + } + # Close off any remaining tags + while ( $t = array_pop( $tagstack ) ) { + $text .= "\n"; + if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); } + } + } else { + # this might be possible using tidy itself + foreach ( $bits as $x ) { + preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + $x, $regs ); + @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; + if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { + $newparams = $this->fixTagAttributes($params); $rest = str_replace( ">", ">", $rest ); $text .= "<$slash$t $newparams$brace$rest"; - continue; + } else { + $text .= "<" . str_replace( ">", ">", $x); } - } - $text .= "<" . str_replace( ">", ">", $x); - } - # Close off any remaining tags - while ( $t = array_pop( $tagstack ) ) { - $text .= "\n"; - if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); } + } } wfProfileOut( $fname ); return $text; } -/* - * + +/* + * * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled * 2) Add an [edit] link to sections for logged in users who have enabled the option @@ -1504,11 +1669,13 @@ class Parser * * It loops through all headlines, collects the necessary data, then splits up the * string and re-inserts the newly formatted headlines. - * + * */ /* private */ function formatHeadings( $text ) { + global $wgInputEncoding; + $doNumberHeadings = $this->mOptions->getNumberHeadings(); $doShowToc = $this->mOptions->getShowToc(); if( !$this->mTitle->userCanEdit() ) { @@ -1524,7 +1691,7 @@ class Parser if( $esw->matchAndRemove( $text ) ) { $showEditLink = 0; } - # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, + # if the string __NOTOC__ (not case-sensitive) occurs in the HTML, # do not add TOC $mw =& MagicWord::get( MAG_NOTOC ); if( $mw->matchAndRemove( $text ) ) { @@ -1575,12 +1742,12 @@ class Parser $prevlevel = $level; } $level = $matches[1][$headlineCount]; - if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { + if( ( $doNumberHeadings || $doShowToc ) && $prevlevel && $level > $prevlevel ) { # reset when we enter a new level $sublevelCount[$level] = 0; $toc .= $sk->tocIndent( $level - $prevlevel ); $toclevel += $level - $prevlevel; - } + } if( ( $doNumberHeadings || $doShowToc ) && $level < $prevlevel ) { # reset when we step back a level $sublevelCount[$level+1]=0; @@ -1597,37 +1764,37 @@ class Parser $numbering .= "."; } $numbering .= $sublevelCount[$i]; - $dot = 1; + $dot = 1; } } } # The canonized header is a version of the header text safe to use for links # Avoid insertion of weird stuff like by expanding the relevant sections - $canonized_headline = Parser::unstrip( $headline, $this->mStripState ); - + $canonized_headline = $this->unstrip( $headline, $this->mStripState ); + # strip out HTML $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline ); - $tocline = trim( $canonized_headline ); - $canonized_headline = preg_replace("/[ &\\/<>\\(\\)\\[\\]=,+']+/", '_', html_entity_decode( $tocline)); + $tocline = trim( $canonized_headline ); + $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) ); $refer[$headlineCount] = $canonized_headline; - + # count how many in assoc. array so we can track dupes in anchors @$refers[$canonized_headline]++; $refcount[$headlineCount]=$refers[$canonized_headline]; # Prepend the number to the heading text - + if( $doNumberHeadings || $doShowToc ) { $tocline = $numbering . " " . $tocline; - + # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { # the two are different if the line contains a link $headline=$numbering . " " . $headline; } } - + # Create the anchor for linking from the TOC to the section $anchor = $canonized_headline; if($refcount[$headlineCount] > 1 ) { @@ -1642,17 +1809,17 @@ class Parser } $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1); } - + # Add the edit section span if( $rightClickHack ) { - $headline = $sk->editSectionScript($headlineCount+1,$headline); + $headline = $sk->editSectionScript($headlineCount+1,$headline); } # give headline the correct tag @$head[$headlineCount] .= ""; - + $headlineCount++; - } + } if( $doShowToc ) { $toclines = $headlineCount; @@ -1661,15 +1828,18 @@ class Parser } # split up and insert constructed headlines - + $blocks = preg_split( "/.*?<\/H[1-6]>/i", $text ); $i = 0; foreach( $blocks as $block ) { if( $showEditLink && $headlineCount > 0 && $i == 0 && $block != "\n" ) { - # This is the [edit] link that appears for the top block of text when + # This is the [edit] link that appears for the top block of text when # section editing is enabled - $full .= $sk->editSectionLink(0); + + # Disabled because it broke block formatting + # For example, a bullet point in the top line + # $full .= $sk->editSectionLink(0); } $full .= $block; if( $doShowToc && !$i) { @@ -1682,7 +1852,7 @@ class Parser } $i++; } - + return $full; } @@ -1716,7 +1886,7 @@ class Parser } $num = str_replace( "-", "", $isbn ); $num = str_replace( " ", "", $num ); - + if ( "" == $num ) { $text = "ISBN $blank$x"; } else { @@ -1759,7 +1929,7 @@ class Parser $rfc .= $x{0}; $x = substr( $x, 1 ); } - + if ( "" == $rfc ) { $text .= "RFC $blank$x"; } else { @@ -1780,11 +1950,11 @@ class Parser $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_WIKI; - + if ( $clearState ) { $this->clearState(); } - + $stripState = false; $pairs = array( "\r\n" => "\n", @@ -1855,16 +2025,16 @@ class Parser } else { $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text ); } - + /* $mw =& MagicWord::get( MAG_SUBST ); $wgCurParser = $this->fork(); $text = $mw->substituteCallback( $text, "wfBraceSubstitution" ); $this->merge( $wgCurParser ); */ - + # Trim trailing whitespace - # MAG_END (__END__) tag allows for trailing + # MAG_END (__END__) tag allows for trailing # whitespace to be deliberately included $text = rtrim( $text ); $mw =& MagicWord::get( MAG_END ); @@ -1875,7 +2045,7 @@ class Parser # Set up some variables which are usually set up in parse() # so that an external function can call some class members with confidence - function startExternalParse( &$title, $options, $outputType, $clearState = true ) + function startExternalParse( &$title, $options, $outputType, $clearState = true ) { $this->mTitle =& $title; $this->mOptions = $options; @@ -1888,7 +2058,7 @@ class Parser function transformMsg( $text, $options ) { global $wgTitle; static $executing = false; - + # Guard against infinite recursion if ( $executing ) { return $text; @@ -1900,7 +2070,7 @@ class Parser $this->mOutputType = OT_MSG; $this->clearState(); $text = $this->replaceVariables( $text ); - + $executing = false; return $text; } @@ -1975,17 +2145,17 @@ class ParserOptions function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); } - /* static */ function newFromUser( &$user ) + /* static */ function newFromUser( &$user ) { $popts = new ParserOptions; - $popts->initialiseFromUser( &$user ); + $popts->initialiseFromUser( $user ); return $popts; } - function initialiseFromUser( &$userInput ) + function initialiseFromUser( &$userInput ) { global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; - + if ( !$userInput ) { $user = new User; $user->setLoaded( true );