2 require_once ( "Parser.php" ) ;
5 * This should one day become the XML->(X)HTML parser
6 * Based on work by Jan Hidders and Magnus Manske
8 * $wgUseXMLparser = true ;
9 * $wgEnableParserCache = false ;
10 * $wgWiki2xml to the path and executable of the command line version (cli)
11 * in LocalSettings.php
13 * @subpackage Experimental
17 * the base class for an element
22 var $children = array();
25 * This finds the ATTRS element and returns the ATTR sub-children as a single string
27 function getSourceAttrs ()
30 foreach ($this->children
as $child)
32 if ( !is_string($child) AND $child->name
== "ATTRS" )
34 $ret = $child->makeXHTML ( $parser );
41 * This collects the ATTR thingies for getSourceAttrs()
43 function getTheseAttrs ()
46 foreach ($this->children
as $child)
48 if ( !is_string($child) AND $child->name
== "ATTR" )
50 $ret[] = $child->attrs
["NAME"] . "='" . $child->children
[0] . "'" ;
53 return implode ( " " , $ret ) ;
56 function fixLinkTails ( &$parser , $key )
59 if ( !isset ( $this->children
[$k2] ) ) return ;
60 if ( !is_string ( $this->children
[$k2]) ) return ;
61 if ( is_string ( $this->children
[$key]) ) return ;
62 if ( $this->children
[$key]->name
!= "LINK" ) return ;
64 $n = $this->children
[$k2] ;
67 ( ( $n[0] >= 'a' AND $n[0] <= 'z' ) OR
68 $n[0] == 'ä' OR $n[0] == 'ö' OR
69 $n[0] == 'ü' OR $n[0] == 'ß' ) )
72 $n = substr ( $n , 1 ) ;
74 $this->children
[$k2] = $n ;
76 if ( count ( $this->children
[$key]->children
) > 1 )
78 $kl = array_keys ( $this->children
[$key]->children
) ;
79 $kl = array_pop ( $kl ) ;
80 $this->children
[$key]->children
[$kl]->children
[] = $s ;
85 $e->name
= "LINKOPTION" ;
86 $t = $this->children
[$key]->sub_makeXHTML ( $parser ) ;
87 $e->children
[] = trim ( $t ) . $s ;
88 $this->children
[$key]->children
[] = $e ;
93 * This function generates the XHTML for the entire subtree
95 function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" )
99 $attr2 = $this->getSourceAttrs () ;
100 if ( $attr != "" AND $attr2 != "" ) $attr .= " " ;
106 if ( $attr != "" ) $ret .= " " . $attr ;
110 # foreach ( array_keys ( $this->children ) AS $x )
111 # $this->fixLinkTails ( $parser , $x ) ;
113 foreach ($this->children
as $key => $child) {
114 if ( is_string($child) ) {
116 } else if ( $child->name
!= "ATTRS" ) {
117 $ret .= $child->makeXHTML ( $parser );
121 $ret .= "</" . $tag . ">\n" ;
128 function createInternalLink ( &$parser , $target , $display_title , $options )
131 $skin = $wgUser->getSkin() ;
132 $tp = explode ( ":" , $target ) ; # tp = target parts
133 $title = "" ; # The plain title
134 $language = "" ; # The language/meta/etc. part
135 $namespace = "" ; # The namespace, if any
136 $subtarget = "" ; # The '#' thingy
139 $nt = Title
::newFromText ( $target ) ;
140 $fl = strtoupper ( $this->attrs
["FORCEDLINK"] ) == "YES" ;
142 if ( $fl ||
count ( $tp ) == 1 ) $title = $target ; # Plain and simple case
143 else # There's stuff missing here...
145 if ( $nt->getNamespace() == NS_IMAGE
)
147 $options[] = $display_title ;
148 return $skin->makeImageLinkObj ( $nt , implode ( "|" , $options ) ) ;
150 else $title = $target ; # Default
153 if ( $language != "" ) # External link within the WikiMedia project
155 return "{language link}" ;
157 else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff
159 return "{namespace link}" ;
163 return $skin->makeLink ( $target , $display_title ) ;
167 function makeInternalLink ( &$parser )
171 foreach ($this->children
as $child) {
172 if ( is_string($child) ) {
173 # This shouldn't be the case!
175 if ( $child->name
== "LINKTARGET" )
176 $target = trim ( $child->makeXHTML ( $parser ) ) ;
178 $option[] = trim ( $child->makeXHTML ( $parser ) ) ;
182 if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title
183 $display_title = array_pop ( $option ) ;
184 return $this->createInternalLink ( $parser , $target , $display_title , $option ) ;
187 function getTemplateXHTML ( $title , $parts , &$parser ) {
188 global $wgLang , $wgUser ;
189 $skin = $wgUser->getSkin() ;
190 $ot = $title ; # Original title
191 if ( count ( explode ( ":" , $title ) ) == 1 )
192 $title = $wgLang->getNsText ( NS_TEMPLATE
) . ":" . $title ;
193 $nt = Title
::newFromText ( $title ) ;
194 $id = $nt->getArticleID() ;
195 if ( $id == 0 ) { # No/non-existing page
196 return $skin->makeBrokenLink ( $title , $ot ) ;
200 $tv = array () ; # Template variables
201 foreach ( $parts AS $part ) {
203 $x = explode ( "=" , $part , 2 ) ;
204 if ( count ( $x ) == 1 ) $key = "{$a}" ;
206 $value = array_pop ( $x ) ;
209 $art = new Article ( $nt ) ;
210 $text = $art->getContent ( false ) ;
211 $parser->plain_parse ( $text , true , $tv ) ;
217 * This function actually converts wikiXML into XHTML tags
219 function makeXHTML ( &$parser )
222 $n = $this->name
; # Shortcut
224 if ( $n == "EXTENSION" ) # Fix allowed HTML
227 $ext = strtoupper ( $this->attrs
["NAME"] ) ;
228 if ( $ext == "B" ||
$ext == "STRONG" ) $n = "BOLD" ;
229 else if ( $ext == "I" ||
$ext == "EM" ) $n = "ITALICS" ;
230 else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-)
231 else if ( $ext == "S" ) $n = "STRIKE" ;
232 else if ( $ext == "P" ) $n = "PARAGRAPH" ;
233 else if ( $ext == "TABLE" ) $n = "TABLE" ;
234 else if ( $ext == "TR" ) $n = "TABLEROW" ;
235 else if ( $ext == "TD" ) $n = "TABLECELL" ;
236 else if ( $ext == "TH" ) $n = "TABLEHEAD" ;
237 else if ( $ext == "CAPTION" ) $n = "CAPTION" ;
238 else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ;
239 if ( $n != $old_n ) unset ( $this->attrs
["NAME"] ) ; # Cleanup
240 else if ( $parser->nowiki
> 0 ) $n = "" ; # No "real" wiki tags allowed in nowiki section
243 if ( $n == "ARTICLE" )
244 $ret .= $this->sub_makeXHTML ( $parser ) ;
245 else if ( $n == "HEADING" )
246 $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs
["LEVEL"] ) ;
247 else if ( $n == "PARAGRAPH" )
248 $ret .= $this->sub_makeXHTML ( $parser , "p" ) ;
249 else if ( $n == "BOLD" )
250 $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ;
251 else if ( $n == "ITALICS" )
252 $ret .= $this->sub_makeXHTML ( $parser , "em" ) ;
254 # These don't exist as wiki markup
255 else if ( $n == "UNDERLINED" )
256 $ret .= $this->sub_makeXHTML ( $parser , "u" ) ;
257 else if ( $n == "STRIKE" )
258 $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ;
261 else if ( $n == "COMMENT" )
262 $ret .= "" ; # Comments are parsed out
265 else if ( $n == "LINK" )
266 $ret .= $this->makeInternalLink ( $parser ) ;
267 else if ( $n == "LINKTARGET" )
268 $ret .= $this->sub_makeXHTML ( $parser ) ;
269 else if ( $n == "LINKOPTION" )
270 $ret .= $this->sub_makeXHTML ( $parser ) ;
272 else if ( $n == "TEMPLATE" )
274 $parts = $this->sub_makeXHTML ( $parser ) ;
275 $parts = explode ( "|" , $parts ) ;
276 $title = array_shift ( $parts ) ;
277 $ret .= $this->getTemplateXHTML ( $title , $parts , &$parser ) ;
279 else if ( $n == "TEMPLATEVAR" )
281 $x = $this->sub_makeXHTML ( $parser ) ;
282 if ( isset ( $parser->mCurrentTemplateOptions
["{$x}"] ) )
283 $ret .= $parser->mCurrentTemplateOptions
["{$x}"] ;
286 else if ( $n == "IGNORE" ) # Internal use, not generated by wiki2xml parser
287 $ret .= $this->sub_makeXHTML ( $parser ) ;
289 else if ( $n == "NOWIKI" )
292 $ret .= $this->sub_makeXHTML ( $parser , "" ) ;
296 # Unknown HTML extension
297 else if ( $n == "EXTENSION" ) # This is currently a dummy!!!
299 $ext = $this->attrs
["NAME"] ;
301 $ret .= "<" . $ext . ">" ;
302 $ret .= $this->sub_makeXHTML ( $parser ) ;
303 $ret .= "</" . $ext . "> " ;
307 else if ( $n == "TABLE" )
309 $ret .= $this->sub_makeXHTML ( $parser , "table" ) ;
311 else if ( $n == "TABLEROW" )
313 $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ;
315 else if ( $n == "TABLECELL" )
317 $ret .= $this->sub_makeXHTML ( $parser , "td" ) ;
319 else if ( $n == "TABLEHEAD" )
321 $ret .= $this->sub_makeXHTML ( $parser , "th" ) ;
323 else if ( $n == "CAPTION" )
325 $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ;
328 else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes
330 return $this->getTheseAttrs () ;
334 else if ( $n == "LISTITEM" )
336 if ( $parser->mListType
== "dl" ) $ret .= $this->sub_makeXHTML ( $parser , "dd" ) ;
337 else $ret .= $this->sub_makeXHTML ( $parser , "li" ) ;
339 else if ( $n == "LIST" )
341 $type = "ol" ; # Default
342 if ( $this->attrs
["TYPE"] == "bullet" ) $type = "ul" ;
343 else if ( $this->attrs
["TYPE"] == "indent" ) $type = "dl" ;
344 $oldtype = $parser->mListType
;
345 $parser->mListType
= $type ;
346 $ret .= $this->sub_makeXHTML ( $parser , $type ) ;
347 $parser->mListType
= $oldtype ;
350 # Something else entirely
353 $ret .= "<" . $n . ">" ;
354 $ret .= $this->sub_makeXHTML ( $parser ) ;
355 $ret .= "</" . $n . "> " ;
358 $ret = "\n{$ret}\n" ;
359 $ret = str_replace ( "\n\n" , "\n" , $ret ) ;
364 * A function for additional debugging output
368 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
370 $ret .= '<li> <b> Attributes: </b>';
371 foreach ($this->attrs
as $name => $value) {
372 $ret .= "$name => $value; " ;
376 foreach ($this->children
as $child) {
377 if ( is_string($child) ) {
378 $ret .= "<li> $child </li>\n";
380 $ret .= $child->myPrint();
388 $ancStack = array(); // the stack with ancestral elements
390 // Three global functions needed for parsing, sorry guys
391 function wgXMLstartElement($parser, $name, $attrs) {
394 $newElem = new element
;
395 $newElem->name
= $name;
396 $newElem->attrs
= $attrs;
398 array_push($ancStack, $newElem);
401 function wgXMLendElement($parser, $name) {
402 global $ancStack, $rootElem;
403 // pop element off stack
404 $elem = array_pop ($ancStack);
405 if (count ($ancStack) == 0)
408 // add it to its parent
409 array_push ($ancStack[count($ancStack)-1]->children
, $elem);
412 function wgXMLcharacterData($parser, $data) {
414 $data = trim ($data); // Don't add blank lines, they're no use...
415 // add to parent if parent exists
416 if ( $ancStack && $data != "" ) {
417 array_push ($ancStack[count($ancStack)-1]->children
, $data);
423 * Here's the class that generates a nice tree
427 function &scanFile( $filename ) {
428 global $ancStack, $rootElem;
431 $xml_parser = xml_parser_create();
432 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
433 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
434 if (!($fp = fopen($filename, 'r'))) {
435 die('could not open XML input');
437 while ($data = fread($fp, 4096)) {
438 if (!xml_parse($xml_parser, $data, feof($fp))) {
439 die(sprintf("XML error: %s at line %d",
440 xml_error_string(xml_get_error_code($xml_parser)),
441 xml_get_current_line_number($xml_parser)));
444 xml_parser_free($xml_parser);
446 // return the remaining root element we copied in the beginning
450 function scanString ( $input ) {
451 global $ancStack, $rootElem;
454 $xml_parser = xml_parser_create();
455 xml_set_element_handler ($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
456 xml_set_character_data_handler ($xml_parser, 'wgXMLcharacterData');
458 if (!xml_parse ($xml_parser, $input, true)) {
459 die (sprintf ("XML error: %s at line %d",
460 xml_error_string(xml_get_error_code($xml_parser)),
461 xml_get_current_line_number($xml_parser)));
463 xml_parser_free ($xml_parser);
465 // return the remaining root element we copied in the beginning
471 class ParserXML
EXTENDS Parser
477 var $mTagHooks, $mListType;
479 # Cleared with clearState():
480 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array();
481 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
484 var $mOptions, $mTitle, $mOutputType,
485 $mTemplates, // cache of already loaded templates, avoids
486 // multiple SQL queries for the same string
487 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
488 // in this path. Used for loop detection.
490 var $nowikicount , $mCurrentTemplateOptions ;
499 function ParserXML() {
500 $this->mTemplates
= array();
501 $this->mTemplatePath
= array();
502 $this->mTagHooks
= array();
511 function clearState() {
512 $this->mOutput
= new ParserOutput
;
513 $this->mAutonumber
= 0;
514 $this->mLastSection
= "";
515 $this->mDTopen
= false;
516 $this->mVariables
= false;
517 $this->mIncludeCount
= array();
518 $this->mStripState
= array();
519 $this->mArgStack
= array();
520 $this->mInPre
= false;
524 * Turns the wikitext into XML by calling the external parser
527 function runXMLparser ( &$text ) {
530 $tmpfname = tempnam("/tmp", "FOO");
531 $handle = fopen($tmpfname, "w");
532 fwrite($handle, $text);
534 exec ( $wgWiki2xml . " < " . $tmpfname , $a ) ;
535 $text = implode ( "\n" , $a ) ;
539 function plain_parse ( &$text , $inline = false , $templateOptions = array () ) {
540 $this->runXMLparser ( $text ) ;
543 $result = $w->scanString( $text );
545 $oldTemplateOptions = $this->mCurrentTemplateOptions
;
546 $this->mCurrentTemplateOptions
= $templateOptions ;
548 if ( $inline ) { # Inline rendering off for templates
549 if ( count ( $result->children
) == 1 )
550 $result->children
[0]->name
= "IGNORE" ;
553 if ( 1 ) $text = $result->makeXHTML ( $this ) ; # No debugging info
554 else $text = $result->makeXHTML ( $this ) . "<hr>" . $text . "<hr>" . $result->myPrint();
555 $this->mCurrentTemplateOptions
= $oldTemplateOptions ;
558 function parse( $text, &$title, $options, $linestart = true, $clearState = true ) {
559 $this->plain_parse ( $text ) ;
560 $this->mOutput
->setText ( $text ) ;
561 return $this->mOutput
;