* Fixed unclosed <p> tag
[lhc/web/wiklou.git] / includes / ParserXML.php
1 <?php
2 /**
3 *
4 * @package MediaWiki
5 * @subpackage Experimental
6 */
7
8 /** */
9 require_once ('Parser.php');
10
11 /**
12 * This should one day become the XML->(X)HTML parser
13 * Based on work by Jan Hidders and Magnus Manske
14 * To use, set
15 * $wgUseXMLparser = true ;
16 * $wgEnableParserCache = false ;
17 * $wgWiki2xml to the path and executable of the command line version (cli)
18 * in LocalSettings.php
19 * @package MediaWiki
20 * @subpackage Experimental
21 */
22
23 /**
24 * the base class for an element
25 * @package MediaWiki
26 * @subpackage Experimental
27 */
28 class element {
29 var $name = '';
30 var $attrs = array ();
31 var $children = array ();
32
33 /**
34 * This finds the ATTRS element and returns the ATTR sub-children as a single string
35 */
36 function getSourceAttrs() {
37 $ret = '';
38 foreach ($this->children as $child) {
39 if (!is_string($child) AND $child->name == 'ATTRS') {
40 $ret = $child->makeXHTML($parser);
41 }
42 }
43 return $ret;
44 }
45
46 /**
47 * This collects the ATTR thingies for getSourceAttrs()
48 */
49 function getTheseAttrs() {
50 $ret = array ();
51 foreach ($this->children as $child) {
52 if (!is_string($child) AND $child->name == 'ATTR') {
53 $ret[] = $child->attrs["NAME"]."='".$child->children[0]."'";
54 }
55 }
56 return implode(' ', $ret);
57 }
58
59 function fixLinkTails(& $parser, $key) {
60 $k2 = $key +1;
61 if (!isset ($this->children[$k2]))
62 return;
63 if (!is_string($this->children[$k2]))
64 return;
65 if (is_string($this->children[$key]))
66 return;
67 if ($this->children[$key]->name != "LINK")
68 return;
69
70 $n = $this->children[$k2];
71 $s = '';
72 while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
73 $s .= $n[0];
74 $n = substr($n, 1);
75 }
76 $this->children[$k2] = $n;
77
78 if (count($this->children[$key]->children) > 1) {
79 $kl = array_keys($this->children[$key]->children);
80 $kl = array_pop($kl);
81 $this->children[$key]->children[$kl]->children[] = $s;
82 } else {
83 $e = new element;
84 $e->name = "LINKOPTION";
85 $t = $this->children[$key]->sub_makeXHTML($parser);
86 $e->children[] = trim($t).$s;
87 $this->children[$key]->children[] = $e;
88 }
89 }
90
91 /**
92 * This function generates the XHTML for the entire subtree
93 */
94 function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
95 $ret = '';
96
97 $attr2 = $this->getSourceAttrs();
98 if ($attr != '' AND $attr2 != '')
99 $attr .= ' ';
100 $attr .= $attr2;
101
102 if ($tag != '') {
103 $ret .= '<'.$tag;
104 if ($attr != '')
105 $ret .= ' '.$attr;
106 $ret .= '>';
107 }
108
109 # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
110 # foreach ( array_keys ( $this->children ) AS $x )
111 # $this->fixLinkTails ( $parser , $x ) ;
112
113 foreach ($this->children as $key => $child) {
114 if (is_string($child)) {
115 $ret .= $child;
116 } elseif ($child->name != 'ATTRS') {
117 $ret .= $child->makeXHTML($parser);
118 }
119 }
120 if ($tag != '')
121 $ret .= '</'.$tag.">\n";
122 return $ret;
123 }
124
125 /**
126 * Link functions
127 */
128 function createInternalLink(& $parser, $target, $display_title, $options) {
129 global $wgUser;
130 $skin = $wgUser->getSkin();
131 $tp = explode(':', $target); # tp = target parts
132 $title = ''; # The plain title
133 $language = ''; # The language/meta/etc. part
134 $namespace = ''; # The namespace, if any
135 $subtarget = ''; # The '#' thingy
136
137 $nt = Title :: newFromText($target);
138 $fl = strtoupper($this->attrs['FORCEDLINK']) == 'YES';
139
140 if ($fl || count($tp) == 1) {
141 # Plain and simple case
142 $title = $target;
143 } else {
144 # There's stuff missing here...
145 if ($nt->getNamespace() == NS_IMAGE) {
146 $options[] = $display_title;
147 return $parser->makeImage($nt, implode('|', $options));
148 } else {
149 # Default
150 $title = $target;
151 }
152 }
153
154 if ($language != '') {
155 # External link within the WikiMedia project
156 return "{language link}";
157 } else {
158 if ($namespace != '') {
159 # Link to another namespace, check for image/media stuff
160 return "{namespace link}";
161 } else {
162 return $skin->makeLink($target, $display_title);
163 }
164 }
165 }
166
167 /** @todo document */
168 function makeInternalLink(& $parser) {
169 $target = '';
170 $option = array ();
171 foreach ($this->children as $child) {
172 if (is_string($child)) {
173 # This shouldn't be the case!
174 } else {
175 if ($child->name == 'LINKTARGET') {
176 $target = trim($child->makeXHTML($parser));
177 } else {
178 $option[] = trim($child->makeXHTML($parser));
179 }
180 }
181 }
182
183 if (count($option) == 0)
184 $option[] = $target; # Create dummy display title
185 $display_title = array_pop($option);
186 return $this->createInternalLink($parser, $target, $display_title, $option);
187 }
188
189 /** @todo document */
190 function getTemplateXHTML($title, $parts, & $parser) {
191 global $wgLang, $wgUser;
192 $skin = $wgUser->getSkin();
193 $ot = $title; # Original title
194 if (count(explode(':', $title)) == 1)
195 $title = $wgLang->getNsText(NS_TEMPLATE).":".$title;
196 $nt = Title :: newFromText($title);
197 $id = $nt->getArticleID();
198 if ($id == 0) {
199 # No/non-existing page
200 return $skin->makeBrokenLink($title, $ot);
201 }
202
203 $a = 0;
204 $tv = array (); # Template variables
205 foreach ($parts AS $part) {
206 $a ++;
207 $x = explode('=', $part, 2);
208 if (count($x) == 1)
209 $key = "{$a}";
210 else
211 $key = $x[0];
212 $value = array_pop($x);
213 $tv[$key] = $value;
214 }
215 $art = new Article($nt);
216 $text = $art->getContent(false);
217 $parser->plain_parse($text, true, $tv);
218
219 return $text;
220 }
221
222 /**
223 * This function actually converts wikiXML into XHTML tags
224 * @todo use switch() !
225 */
226 function makeXHTML(& $parser) {
227 $ret = '';
228 $n = $this->name; # Shortcut
229
230 if ($n == 'EXTENSION') {
231 # Fix allowed HTML
232 $old_n = $n;
233 $ext = strtoupper($this->attrs['NAME']);
234
235 switch($ext) {
236 case 'B':
237 case 'STRONG':
238 $n = 'BOLD';
239 break;
240 case 'I':
241 case 'EM':
242 $n = 'ITALICS';
243 break;
244 case 'U':
245 $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
246 break;
247 case 'S':
248 $n = 'STRIKE';
249 break;
250 case 'P':
251 $n = 'PARAGRAPH';
252 break;
253 case 'TABLE':
254 $n = 'TABLE';
255 break;
256 case 'TR':
257 $n = 'TABLEROW';
258 break;
259 case 'TD':
260 $n = 'TABLECELL';
261 break;
262 case 'TH':
263 $n = 'TABLEHEAD';
264 break;
265 case 'CAPTION':
266 $n = 'CAPTION';
267 break;
268 case 'NOWIKI':
269 $n = 'NOWIKI';
270 break;
271 }
272 if ($n != $old_n) {
273 unset ($this->attrs['NAME']); # Cleanup
274 } elseif ($parser->nowiki > 0) {
275 # No 'real' wiki tags allowed in nowiki section
276 $n = '';
277 }
278 } // $n = 'EXTENSION'
279
280 switch($n) {
281 case 'ARTICLE':
282 $ret .= $this->sub_makeXHTML($parser);
283 break;
284 case 'HEADING':
285 $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs['LEVEL']);
286 break;
287 case 'PARAGRAPH':
288 $ret .= $this->sub_makeXHTML($parser, 'p');
289 break;
290 case 'BOLD':
291 $ret .= $this->sub_makeXHTML($parser, 'strong');
292 break;
293 case 'ITALICS':
294 $ret .= $this->sub_makeXHTML($parser, 'em');
295 break;
296
297 # These don't exist as wiki markup
298 case 'UNDERLINED':
299 $ret .= $this->sub_makeXHTML($parser, 'u');
300 break;
301 case 'STRIKE':
302 $ret .= $this->sub_makeXHTML($parser, 'strike');
303 break;
304
305 # HTML comment
306 case 'COMMENT':
307 # Comments are parsed out
308 $ret .= '';
309 break;
310
311
312 # Links
313 case 'LINK':
314 $ret .= $this->makeInternalLink($parser);
315 break;
316 case 'LINKTARGET':
317 case 'LINKOPTION':
318 $ret .= $this->sub_makeXHTML($parser);
319 break;
320
321 case 'TEMPLATE':
322 $parts = $this->sub_makeXHTML($parser);
323 $parts = explode('|', $parts);
324 $title = array_shift($parts);
325 $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
326 break;
327
328 case 'TEMPLATEVAR':
329 $x = $this->sub_makeXHTML($parser);
330 if (isset ($parser->mCurrentTemplateOptions["{$x}"]))
331 $ret .= $parser->mCurrentTemplateOptions["{$x}"];
332 break;
333
334 # Internal use, not generated by wiki2xml parser
335 case 'IGNORE':
336 $ret .= $this->sub_makeXHTML($parser);
337
338 case 'NOWIKI':
339 $parser->nowiki++;
340 $ret .= $this->sub_makeXHTML($parser, '');
341 $parser->nowiki--;
342
343
344 # Unknown HTML extension
345 case 'EXTENSION': # This is currently a dummy!!!
346 $ext = $this->attrs['NAME'];
347
348 $ret .= '&lt;'.$ext.'&gt;';
349 $ret .= $this->sub_makeXHTML($parser);
350 $ret .= '&lt;/'.$ext.'&gt; ';
351 break;
352
353
354 # Table stuff
355
356 case 'TABLE':
357 $ret .= $this->sub_makeXHTML($parser, 'table');
358 break;
359 case 'TABLEROW':
360 $ret .= $this->sub_makeXHTML($parser, 'tr');
361 break;
362 case 'TABLECELL':
363 $ret .= $this->sub_makeXHTML($parser, 'td');
364 break;
365 case 'TABLEHEAD':
366 $ret .= $this->sub_makeXHTML($parser, 'th');
367 break;
368 case 'CAPTION':
369 $ret .= $this->sub_makeXHTML($parser, 'caption');
370 break;
371 case 'ATTRS': # SPECIAL CASE : returning attributes
372 return $this->getTheseAttrs();
373
374
375 # Lists stuff
376 case 'LISTITEM':
377 if ($parser->mListType == 'dl')
378 $ret .= $this->sub_makeXHTML($parser, 'dd');
379 else
380 $ret .= $this->sub_makeXHTML($parser, 'li');
381 break;
382 case 'LIST':
383 $type = 'ol'; # Default
384 if ($this->attrs['TYPE'] == 'bullet')
385 $type = 'ul';
386 else
387 if ($this->attrs['TYPE'] == 'indent')
388 $type = 'dl';
389 $oldtype = $parser->mListType;
390 $parser->mListType = $type;
391 $ret .= $this->sub_makeXHTML($parser, $type);
392 $parser->mListType = $oldtype;
393 break;
394
395 # Something else entirely
396 default:
397 $ret .= '&lt;'.$n.'&gt;';
398 $ret .= $this->sub_makeXHTML($parser);
399 $ret .= '&lt;/'.$n.'&gt; ';
400 } // switch($n)
401
402 $ret = "\n{$ret}\n";
403 $ret = str_replace("\n\n", "\n", $ret);
404 return $ret;
405 }
406
407 /**
408 * A function for additional debugging output
409 */
410 function myPrint() {
411 $ret = "<ul>\n";
412 $ret .= "<li> <b> Name: </b> $this->name </li>\n";
413 // print attributes
414 $ret .= '<li> <b> Attributes: </b>';
415 foreach ($this->attrs as $name => $value) {
416 $ret .= "$name => $value; ";
417 }
418 $ret .= " </li>\n";
419 // print children
420 foreach ($this->children as $child) {
421 if (is_string($child)) {
422 $ret .= "<li> $child </li>\n";
423 } else {
424 $ret .= $child->myPrint();
425 }
426 }
427 $ret .= "</ul>\n";
428 return $ret;
429 }
430 }
431
432 $ancStack = array (); // the stack with ancestral elements
433
434 // START Three global functions needed for parsing, sorry guys
435 /** @todo document */
436 function wgXMLstartElement($parser, $name, $attrs) {
437 global $ancStack;
438
439 $newElem = new element;
440 $newElem->name = $name;
441 $newElem->attrs = $attrs;
442
443 array_push($ancStack, $newElem);
444 }
445
446 /** @todo document */
447 function wgXMLendElement($parser, $name) {
448 global $ancStack, $rootElem;
449 // pop element off stack
450 $elem = array_pop($ancStack);
451 if (count($ancStack) == 0)
452 $rootElem = $elem;
453 else
454 // add it to its parent
455 array_push($ancStack[count($ancStack) - 1]->children, $elem);
456 }
457
458 /** @todo document */
459 function wgXMLcharacterData($parser, $data) {
460 global $ancStack;
461 $data = trim($data); // Don't add blank lines, they're no use...
462 // add to parent if parent exists
463 if ($ancStack && $data != "") {
464 array_push($ancStack[count($ancStack) - 1]->children, $data);
465 }
466 }
467 // END Three global functions needed for parsing, sorry guys
468
469 /**
470 * Here's the class that generates a nice tree
471 * @package MediaWiki
472 * @subpackage Experimental
473 */
474 class xml2php {
475
476 /** @todo document */
477 function & scanFile($filename) {
478 global $ancStack, $rootElem;
479 $ancStack = array ();
480
481 $xml_parser = xml_parser_create();
482 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
483 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
484 if (!($fp = fopen($filename, 'r'))) {
485 die('could not open XML input');
486 }
487 while ($data = fread($fp, 4096)) {
488 if (!xml_parse($xml_parser, $data, feof($fp))) {
489 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
490 }
491 }
492 xml_parser_free($xml_parser);
493
494 // return the remaining root element we copied in the beginning
495 return $rootElem;
496 }
497
498 /** @todo document */
499 function scanString($input) {
500 global $ancStack, $rootElem;
501 $ancStack = array ();
502
503 $xml_parser = xml_parser_create();
504 xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
505 xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
506
507 if (!xml_parse($xml_parser, $input, true)) {
508 die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
509 }
510 xml_parser_free($xml_parser);
511
512 // return the remaining root element we copied in the beginning
513 return $rootElem;
514 }
515
516 }
517
518 /**
519 * @todo document
520 * @package MediaWiki
521 * @subpackage Experimental
522 */
523 class ParserXML extends Parser {
524 /**#@+
525 * @access private
526 */
527 # Persistent:
528 var $mTagHooks, $mListType;
529
530 # Cleared with clearState():
531 var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
532 var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
533
534 # Temporary:
535 var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
536 // multiple SQL queries for the same string
537 $mTemplatePath; // stores an unsorted hash of all the templates already loaded
538 // in this path. Used for loop detection.
539
540 var $nowikicount, $mCurrentTemplateOptions;
541
542 /**#@-*/
543
544 /**
545 * Constructor
546 *
547 * @access public
548 */
549 function ParserXML() {
550 $this->mTemplates = array ();
551 $this->mTemplatePath = array ();
552 $this->mTagHooks = array ();
553 $this->clearState();
554 }
555
556 /**
557 * Clear Parser state
558 *
559 * @access private
560 */
561 function clearState() {
562 $this->mOutput = new ParserOutput;
563 $this->mAutonumber = 0;
564 $this->mLastSection = "";
565 $this->mDTopen = false;
566 $this->mVariables = false;
567 $this->mIncludeCount = array ();
568 $this->mStripState = array ();
569 $this->mArgStack = array ();
570 $this->mInPre = false;
571 }
572
573 /**
574 * Turns the wikitext into XML by calling the external parser
575 *
576 */
577 function html2xml(& $text) {
578 global $wgWiki2xml;
579
580 # generating html2xml command path
581 $a = $wgWiki2xml;
582 $a = explode('/', $a);
583 array_pop($a);
584 $a[] = 'html2xml';
585 $html2xml = implode('/', $a);
586 $a = array ();
587
588 $tmpfname = tempnam( wfTempDir(), 'FOO' );
589 $handle = fopen($tmpfname, 'w');
590 fwrite($handle, utf8_encode($text));
591 fclose($handle);
592 exec($html2xml.' < '.$tmpfname, $a);
593 $text = utf8_decode(implode("\n", $a));
594 unlink($tmpfname);
595 }
596
597 /** @todo document */
598 function runXMLparser(& $text) {
599 global $wgWiki2xml;
600
601 $this->html2xml($text);
602
603 $tmpfname = tempnam( wfTempDir(), 'FOO');
604 $handle = fopen($tmpfname, 'w');
605 fwrite($handle, $text);
606 fclose($handle);
607 exec($wgWiki2xml.' < '.$tmpfname, $a);
608 $text = utf8_decode(implode("\n", $a));
609 unlink($tmpfname);
610 }
611
612 /** @todo document */
613 function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
614 $this->runXMLparser($text);
615 $nowikicount = 0;
616 $w = new xml2php;
617 $result = $w->scanString($text);
618
619 $oldTemplateOptions = $this->mCurrentTemplateOptions;
620 $this->mCurrentTemplateOptions = $templateOptions;
621
622 if ($inline) { # Inline rendering off for templates
623 if (count($result->children) == 1)
624 $result->children[0]->name = 'IGNORE';
625 }
626
627 if (1)
628 $text = $result->makeXHTML($this); # No debugging info
629 else
630 $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
631 $this->mCurrentTemplateOptions = $oldTemplateOptions;
632 }
633
634 /** @todo document */
635 function parse($text, & $title, $options, $linestart = true, $clearState = true) {
636 $this->plain_parse($text);
637 $this->mOutput->setText($text);
638 return $this->mOutput;
639 }
640
641 }
642 ?>